예제 #1
0
    def setUp(self):
        self.data = {
            'id': [chr(97 + c) for c in range(1, 10)],
            'x': [50, 50, -10, 0, 0, 5, 15, -3, None],
            'y': [
                0.000001, 654.152, None, 15.984512, 3122, -3.1415926535, 111,
                15.9, 13.5
            ],
            'cat': [
                'a', 'long text value', u'Élysée', '', None,
                'some <b> B.s </div> </div> HTML stuff', 'c', 'c', 'c'
            ],
            's1':
            np.ones(9),
            's2':
            [u'some constant text $ % value {obj} ' for _ in range(1, 10)],
            'somedate': [
                datetime.date(2011, 7, 4),
                datetime.datetime(2022, 1, 1, 13, 57),
                datetime.datetime(1990, 12, 9), None,
                datetime.datetime(1990, 12, 9),
                datetime.datetime(1950, 12, 9),
                datetime.datetime(1898, 1, 2),
                datetime.datetime(1950, 12, 9),
                datetime.datetime(1950, 12, 9)
            ]
        }
        self.df = pd.DataFrame(self.data)
        self.df['somedate'] = pd.to_datetime(self.df['somedate'])

        self.results = describe(self.df)
        self.test_dir = tempfile.mkdtemp()
예제 #2
0
    def test_recoding_reject(self):
        self.data = {
            'x': [
                'chien', 'chien', 'chien', 'chien', 'chat', 'chat', 'chameaux',
                'chameaux'
            ],
            'y': ['dog', 'dog', 'dog', 'dog', 'cat', 'cat', 'camel', 'camel'],
        }
        self.df = pd.DataFrame(self.data)
        self.results = describe(self.df)

        self.assertEqual(self.results['variables'].loc['x']['type'], 'RECODED')
        self.assertEqual(self.results['variables'].loc['x']['correlation_var'],
                         'y')

        expected_results = {
            'total_missing': 0.0,
            'UNIQUE': 0,
            'CONST': 0,
            'nvar': 2,
            'REJECTED': 1,
            'n': 8,
            'RECODED': 1,
            'CORR': 0,
            'DATE': 0,
            'NUM': 0,
            'CAT': 1,
            'n_duplicates': 5
        }
        for key in expected_results:
            self.assertEqual(self.results['table'][key], expected_results[key])
예제 #3
0
    def setUp(self):
        self.data = {'id': [chr(97+c) for c in range(1,10)],
                     'x': [50, 50, -10, 0, 0, 5, 15, -3, None],
                     'y': [0.000001, 654.152, None, 15.984512, 3122, -3.1415926535, 111, 15.9, 13.5],
                     'cat': ['a', 'long text value', u'Élysée', '', None, 'some <b> B.s </div> </div> HTML stuff', 'c',
                             'c',
                             'c'],
                     's1': np.ones(9),
                     's2': [u'some constant text $ % value {obj} ' for _ in range(1, 10)],
                     'somedate': [datetime.date(2011, 7, 4), datetime.datetime(2022, 1, 1, 13, 57),
                                  datetime.datetime(1990, 12, 9), None,
                                  datetime.datetime(1990, 12, 9), datetime.datetime(1950, 12, 9),
                                  datetime.datetime(1898, 1, 2), datetime.datetime(1950, 12, 9)
                         , datetime.datetime(1950, 12, 9)]}
        self.df = pd.DataFrame(self.data)
        self.df['somedate'] = pd.to_datetime(self.df['somedate'])

        self.results = describe(self.df)
        self.test_dir = tempfile.mkdtemp()
예제 #4
0
 def __init__(self, df):
     description_set = describe(df)
     self.html = to_html(df.head(),
                         description_set)
예제 #5
0
 def test_bins(self):
     self.results = describe(self.df, bins=100)
     self.test_describe_df()
예제 #6
0
 def test_bins(self):
     self.results = describe(self.df, bins=100)
     self.test_describe_df()