Python Dataset Beispiele

Programmiersprache: Python

Namespace / Paketname: dataset_describe

Klasse / Typ: Dataset

Beispiele auf hotexamples.com: 7

Python Dataset - 7 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die dataset_describe.Dataset, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

Dataset(4)

corr_with_dependent_abs_std(1)

symbols_std(1)

symbols_min(1)

symbols_mean(1)

symbols_max(1)

n_rows(1)

n_numerical(1)

n_columns(1)

n_classes(1)

n_categorical(1)

corr_with_dependent_abs_min(1)

_get_symbols_per_category(1)

corr_with_dependent_abs_median(1)

corr_with_dependent_abs_mean(1)

corr_with_dependent_abs_max(1)

corr_with_dependent_abs_75p(1)

corr_with_dependent_abs_25p(1)

class_prob_std(1)

class_prob_min(1)

class_prob_median(1)

class_prob_mean(1)

class_prob_max(1)

symbols_sum(1)

Beispiel #1

Datei anzeigen

Datei: tests_dataset_describe.py Projekt: rhiever/sklearn-benchmarks

    def setUp(self):
        # classification problem.
        iris = pd.read_csv("iris.csv")
        self.iris = Dataset(iris)

        # Regression problem
        tips = pd.read_csv("tips.csv")
        self.tips = Dataset(tips, dependent_col="tip")

Beispiel #2

Datei anzeigen

    def setUp(self):
        # classification problem.
        iris = pd.read_csv('iris.csv')
        self.iris = Dataset(iris)

        # Regression problem
        tips = pd.read_csv('tips.csv')
        self.tips = Dataset(tips, dependent_col='tip')

Beispiel #3

Datei anzeigen

Datei: get_metafeatures.py Projekt: weklica/pennai

def generate_metafeatures_from_server(file_id, target_field, **kwargs):
    # Read the data set into memory
    raw_data = get_file_from_server(file_id)
    df = pd.read_csv(StringIO(raw_data), sep=None, engine='python', **kwargs)
    dataset = Dataset(df,
                      dependent_col=target_field,
                      prediction_type='classification')

    return generate_metafeatures(dataset, target_field)

Beispiel #4

Datei anzeigen

Datei: get_metafeatures.py Projekt: zldeng/sklearn-benchmarks

def get_metafeatures(df):
    dataset = Dataset(df, dependent_col = 'class', prediction_type='classification')
   
    meta_features = OrderedDict()
    for i in dir(dataset):
        result = getattr(dataset, i)
        if not i.startswith('__') and not i.startswith('_') and hasattr(result, '__call__'):
            meta_features[i] = result()
    return meta_features

Beispiel #5

Datei anzeigen

Datei: get_metafeatures.py Projekt: weklica/pennai

def generate_metafeatures_from_filepath(input_file, target_field, **kwargs):
    """Calls metafeature generating methods from dataset_describe"""

    # Read the data set into memory
    df = pd.read_csv(input_file, sep=None, engine='python', **kwargs)
    dataset = Dataset(df,
                      dependent_col=target_field,
                      prediction_type='classification')

    return generate_metafeatures(dataset, target_field)

Beispiel #6

Datei anzeigen

class Dataset_Describe(unittest.TestCase):
    def setUp(self):
        # classification problem.
        iris = pd.read_csv('iris.csv')
        self.iris = Dataset(iris)

        # Regression problem
        tips = pd.read_csv('tips.csv')
        self.tips = Dataset(tips, dependent_col='tip')

    def test_number_of_rows(self):
        result = self.iris.n_rows()
        self.assertEqual(150, result)

    def test_number_of_columns(self):
        result = self.iris.n_columns()
        self.assertEqual(5, result)

    def test_if_self_categorical_cols_is_zero_iris(self):
        result = self.iris.categorical_cols
        self.assertEqual([], result)

    def test_number_of_categorical_vars(self):
        result = self.iris.n_categorical()
        self.assertEqual(0, result)

    def test_number_of_numerical_vars(self):
        result = self.iris.n_numerical()
        self.assertEqual(4, result)

    def test_total_nclasses(self):
        result = self.iris.n_classes()
        self.assertEqual(3, result)

    def test_total_nclasses_in_regression_problem(self):
        result = self.tips.n_classes()
        self.assertTrue(math.isnan(result))

    def test_prediction_type_classification(self):
        result = self.iris.prediction_type
        self.assertEqual('classification', result)

    def test_prediction_type_regression(self):
        result = self.tips.prediction_type
        self.assertEqual('regression', result)

    def test_max_corr_with_dependent_classification(self):
        result = self.iris.corr_with_dependent_abs_max()
        self.assertTrue(math.isnan(result))

    def test_max_corr_with_dependent_regression(self):
        result = self.tips.corr_with_dependent_abs_max()
        self.assertAlmostEqual(0.675, result, places=2)

    def test_min_corr_with_dependent_regression(self):
        result = self.tips.corr_with_dependent_abs_min()
        self.assertAlmostEqual(0.002, result, places=2)

    def test_25p_corr_with_dependent_regression(self):
        result = self.tips.corr_with_dependent_abs_25p()
        self.assertAlmostEqual(0.055, result, places=2)

    def test_mean_corr_with_dependent_regression(self):
        result = self.tips.corr_with_dependent_abs_mean()
        self.assertAlmostEqual(0.18, result, places=2)

    def test_median_corr_with_dependent_regression(self):
        result = self.tips.corr_with_dependent_abs_median()
        self.assertAlmostEqual(0.095, result, places=2)

    def test_75p_corr_with_dependent_regression(self):
        result = self.tips.corr_with_dependent_abs_75p()
        self.assertAlmostEqual(0.125, result, places=2)

    def test_std_corr_with_dependent_regression(self):
        result = self.tips.corr_with_dependent_abs_std()
        self.assertAlmostEqual(0.234, result, places=2)

    def test_class_probablity_total_suite(self):
        ## evaluating for regression problem.
        result = self.tips.class_prob_min()
        self.assertTrue(math.isnan(result))
        ## evaluating for iris dataset.
        ## not really a useful test it seems, should get another dataset.
        ## Also not a unit test.

        self.assertAlmostEqual(0.33, self.iris.class_prob_min(), places=2)
        self.assertAlmostEqual(0.33, self.iris.class_prob_max(), places=2)
        self.assertAlmostEqual(0, self.iris.class_prob_std(), places=2)
        self.assertAlmostEqual(0.33, self.iris.class_prob_mean(), places=2)
        self.assertAlmostEqual(0.33, self.iris.class_prob_median(), places=2)

    def test_class_symbols_suite(self):
        mean = 2.5
        std = 1
        min_ = 2
        max_ = 4

        ## Iris has no nominal variables.

        result = self.iris.symbols_sum()
        self.assertTrue(math.isnan(result))

        ## evaluating for tips dataset.
        ## Also not a unit test.
        symbol_counts = self.tips._get_symbols_per_category()

        self.assertEqual(symbol_counts['sex'], 2)
        self.assertEqual(symbol_counts['smoker'], 2)

        self.assertAlmostEqual(max_, self.tips.symbols_max(), places=2)
        self.assertAlmostEqual(min_, self.tips.symbols_min(), places=2)
        self.assertAlmostEqual(std, self.tips.symbols_std(), places=2)
        self.assertAlmostEqual(mean, self.tips.symbols_mean(), places=2)

Beispiel #7

Datei anzeigen

Datei: tests_dataset_describe.py Projekt: rhiever/sklearn-benchmarks

class Dataset_Describe(unittest.TestCase):
    def setUp(self):
        # classification problem.
        iris = pd.read_csv("iris.csv")
        self.iris = Dataset(iris)

        # Regression problem
        tips = pd.read_csv("tips.csv")
        self.tips = Dataset(tips, dependent_col="tip")

    def test_number_of_rows(self):
        result = self.iris.n_rows()
        self.assertEqual(150, result)

    def test_number_of_columns(self):
        result = self.iris.n_columns()
        self.assertEqual(5, result)

    def test_if_self_categorical_cols_is_zero_iris(self):
        result = self.iris.categorical_cols
        self.assertEqual([], result)

    def test_number_of_categorical_vars(self):
        result = self.iris.n_categorical()
        self.assertEqual(0, result)

    def test_number_of_numerical_vars(self):
        result = self.iris.n_numerical()
        self.assertEqual(4, result)

    def test_total_nclasses(self):
        result = self.iris.n_classes()
        self.assertEqual(3, result)

    def test_total_nclasses_in_regression_problem(self):
        result = self.tips.n_classes()
        self.assertTrue(math.isnan(result))

    def test_prediction_type_classification(self):
        result = self.iris.prediction_type
        self.assertEqual("classification", result)

    def test_prediction_type_regression(self):
        result = self.tips.prediction_type
        self.assertEqual("regression", result)

    def test_max_corr_with_dependent_classification(self):
        result = self.iris.corr_with_dependent_abs_max()
        self.assertTrue(math.isnan(result))

    def test_max_corr_with_dependent_regression(self):
        result = self.tips.corr_with_dependent_abs_max()
        self.assertAlmostEqual(0.675, result, places=2)

    def test_min_corr_with_dependent_regression(self):
        result = self.tips.corr_with_dependent_abs_min()
        self.assertAlmostEqual(0.002, result, places=2)

    def test_25p_corr_with_dependent_regression(self):
        result = self.tips.corr_with_dependent_abs_25p()
        self.assertAlmostEqual(0.055, result, places=2)

    def test_mean_corr_with_dependent_regression(self):
        result = self.tips.corr_with_dependent_abs_mean()
        self.assertAlmostEqual(0.18, result, places=2)

    def test_median_corr_with_dependent_regression(self):
        result = self.tips.corr_with_dependent_abs_median()
        self.assertAlmostEqual(0.095, result, places=2)

    def test_75p_corr_with_dependent_regression(self):
        result = self.tips.corr_with_dependent_abs_75p()
        self.assertAlmostEqual(0.125, result, places=2)

    def test_std_corr_with_dependent_regression(self):
        result = self.tips.corr_with_dependent_abs_std()
        self.assertAlmostEqual(0.234, result, places=2)

    def test_class_probablity_total_suite(self):
        ## evaluating for regression problem.
        result = self.tips.class_prob_min()
        self.assertTrue(math.isnan(result))
        ## evaluating for iris dataset.
        ## not really a useful test it seems, should get another dataset.
        ## Also not a unit test.

        self.assertAlmostEqual(0.33, self.iris.class_prob_min(), places=2)
        self.assertAlmostEqual(0.33, self.iris.class_prob_max(), places=2)
        self.assertAlmostEqual(0, self.iris.class_prob_std(), places=2)
        self.assertAlmostEqual(0.33, self.iris.class_prob_mean(), places=2)
        self.assertAlmostEqual(0.33, self.iris.class_prob_median(), places=2)

    def test_class_symbols_suite(self):
        mean = 2.5
        std = 1
        min_ = 2
        max_ = 4

        ## Iris has no nominal variables.

        result = self.iris.symbols_sum()
        self.assertTrue(math.isnan(result))

        ## evaluating for tips dataset.
        ## Also not a unit test.
        symbol_counts = self.tips._get_symbols_per_category()

        self.assertEqual(symbol_counts["sex"], 2)
        self.assertEqual(symbol_counts["smoker"], 2)

        self.assertAlmostEqual(max_, self.tips.symbols_max(), places=2)
        self.assertAlmostEqual(min_, self.tips.symbols_min(), places=2)
        self.assertAlmostEqual(std, self.tips.symbols_std(), places=2)
        self.assertAlmostEqual(mean, self.tips.symbols_mean(), places=2)