def test_summary_string(): df = create_data() results = sample._summarize_data(df) df_result = "average length of string: " + \ str(round(np.mean([len(str(i)) for i in df['F']]), 1)) test_result = results['summary'][5] assert df_result == test_result
def test_summary_other(): df = create_data() results = sample._summarize_data(df) test_result_datetime = results['summary'][1] test_result_category = results['summary'][4] assert test_result_datetime == 'No summary available' assert test_result_category == 'No summary available'
def test_data_type(): df = create_data() results = sample._summarize_data(df) assert df.dtypes.equals(results['data_type'])
def test_column_names(): df = create_data() results = sample._summarize_data(df) assert df.columns.equals(results.index)
def test_sample_record(): df = create_data() results = sample._summarize_data(df) assert df.iloc[1].equals(results['sample_record'])
def test_summary_int32(): df = create_data() results = sample._summarize_data(df) df_result = 'unique values: ' + str(df['D'].unique().size) test_result = results['summary'][3] assert df_result == test_result
def test_summary_float32(): df = create_data() results = sample._summarize_data(df) df_result = 'median value: ' + str(np.median(df['C'])) test_result = results['summary'][2] assert df_result == test_result