def setUp(self): self.data = { 'id': [chr(97 + c) for c in range(1, 10)], 'x': [50, 50, -10, 0, 0, 5, 15, -3, None], 'y': [ 0.000001, 654.152, None, 15.984512, 3122, -3.1415926535, 111, 15.9, 13.5 ], 'cat': [ 'a', 'long text value', u'Élysée', '', None, 'some <b> B.s </div> </div> HTML stuff', 'c', 'c', 'c' ], 's1': np.ones(9), 's2': [u'some constant text $ % value {obj} ' for _ in range(1, 10)], 'somedate': [ datetime.date(2011, 7, 4), datetime.datetime(2022, 1, 1, 13, 57), datetime.datetime(1990, 12, 9), None, datetime.datetime(1990, 12, 9), datetime.datetime(1950, 12, 9), datetime.datetime(1898, 1, 2), datetime.datetime(1950, 12, 9), datetime.datetime(1950, 12, 9) ] } self.df = pd.DataFrame(self.data) self.df['somedate'] = pd.to_datetime(self.df['somedate']) self.results = describe(self.df) self.test_dir = tempfile.mkdtemp()
def test_recoding_reject(self): self.data = { 'x': [ 'chien', 'chien', 'chien', 'chien', 'chat', 'chat', 'chameaux', 'chameaux' ], 'y': ['dog', 'dog', 'dog', 'dog', 'cat', 'cat', 'camel', 'camel'], } self.df = pd.DataFrame(self.data) self.results = describe(self.df) self.assertEqual(self.results['variables'].loc['x']['type'], 'RECODED') self.assertEqual(self.results['variables'].loc['x']['correlation_var'], 'y') expected_results = { 'total_missing': 0.0, 'UNIQUE': 0, 'CONST': 0, 'nvar': 2, 'REJECTED': 1, 'n': 8, 'RECODED': 1, 'CORR': 0, 'DATE': 0, 'NUM': 0, 'CAT': 1, 'n_duplicates': 5 } for key in expected_results: self.assertEqual(self.results['table'][key], expected_results[key])
def setUp(self): self.data = {'id': [chr(97+c) for c in range(1,10)], 'x': [50, 50, -10, 0, 0, 5, 15, -3, None], 'y': [0.000001, 654.152, None, 15.984512, 3122, -3.1415926535, 111, 15.9, 13.5], 'cat': ['a', 'long text value', u'Élysée', '', None, 'some <b> B.s </div> </div> HTML stuff', 'c', 'c', 'c'], 's1': np.ones(9), 's2': [u'some constant text $ % value {obj} ' for _ in range(1, 10)], 'somedate': [datetime.date(2011, 7, 4), datetime.datetime(2022, 1, 1, 13, 57), datetime.datetime(1990, 12, 9), None, datetime.datetime(1990, 12, 9), datetime.datetime(1950, 12, 9), datetime.datetime(1898, 1, 2), datetime.datetime(1950, 12, 9) , datetime.datetime(1950, 12, 9)]} self.df = pd.DataFrame(self.data) self.df['somedate'] = pd.to_datetime(self.df['somedate']) self.results = describe(self.df) self.test_dir = tempfile.mkdtemp()
def __init__(self, df): description_set = describe(df) self.html = to_html(df.head(), description_set)
def test_bins(self): self.results = describe(self.df, bins=100) self.test_describe_df()