def test_datamation_groupby_multiple(): df = small_salary().df df = DatamationFrame(df) # Group by Degree, Work mean = df.groupby(['Degree', 'Work']).mean() assert "groupby" in mean.operations assert "mean" in mean.operations assert len(mean.states) == 2 assert df.equals(mean.states[0]) assert mean.Salary.Masters.Academia == 84.0298831968801 assert mean.Salary.Masters.Industry == 91.22576155606282 assert mean.Salary.PhD.Academia == 85.55796571969728 assert mean.Salary.PhD.Industry == 93.08335885824636 # sum sum = df.groupby(['Degree', 'Work']).sum() assert "groupby" in sum.operations assert "sum" in sum.operations assert len(sum.states) == 2 assert df.equals(sum.states[0]) assert sum.Salary.Masters.Academia == 840.2988319688011 assert sum.Salary.Masters.Industry == 5655.997216475895 assert sum.Salary.PhD.Academia == 1540.043382954551 assert sum.Salary.PhD.Industry == 930.8335885824636 # product product = df.groupby(['Degree', 'Work']).prod() assert "groupby" in product.operations assert "product" in product.operations assert len(product.states) == 2 assert df.equals(product.states[0]) assert product.Salary.Masters.Academia == 1.753532557780977e+19 assert product.Salary.Masters.Industry == 3.3602152421057308e+121 assert product.Salary.PhD.Academia == 6.027761935702164e+34 assert product.Salary.PhD.Industry == 4.8818435443657834e+19 # Group by species, island, sex df = DatamationFrame(load_penguins()) mean = df.groupby(['species', 'island', 'sex']).mean() assert "groupby" in mean.operations assert "mean" in mean.operations assert len(mean.states) == 2 assert df.equals(mean.states[0]) assert mean.bill_length_mm.Adelie.Biscoe.male == approx(40.5909090909091) assert mean.bill_length_mm.Adelie.Biscoe.female == approx( 37.35909090909092)
def test_datamation_frame_groupby(): df = small_salary().df df = DatamationFrame(df) grouped = df.groupby('Work') assert 'groupby' in grouped.operations assert df.equals(grouped.states[0])
def test_small_salary(capsys): df = small_salary().df print(df.groupby('Work').mean()) captured = capsys.readouterr() assert "Work" in captured.out assert "Salary" in captured.out assert "Academia" in captured.out assert "Industry" in captured.out assert "85.012222" in captured.out assert "91.483761" in captured.out
def test_datamation_frame_datamation_sanddance(): df = small_salary().df df = DatamationFrame(df) datamation = df.groupby('Work').mean().datamation_sanddance() assert len(datamation.states) == 2 assert len(datamation.operations) == 2 assert df.equals(datamation.states[0]) assert isinstance(datamation.states[1], DatamationGroupBy) assert datamation.operations[0] == 'groupby' assert datamation.operations[1] == 'mean' assert isinstance(datamation.output, DatamationFrame) assert datamation.output.Salary.Academia == 85.01222196154829 assert datamation.output.Salary.Industry == 91.48376118136609 assert 'Salary' in str(datamation)
def test_datamation_frame_specs(): df = small_salary().df df = DatamationFrame(df) # Mean # Group by Degree specs = df.groupby('Degree').mean().specs() script_dir = os.path.dirname(__file__) with open(os.path.join(script_dir, '../../inst/specs/raw_spec.json'), 'r') as specs_file: compare_specs_with_file(specs, specs_file) # Group by Work specs = df.groupby('Work').mean().specs() script_dir = os.path.dirname(__file__) with open(os.path.join(script_dir, '../../inst/specs/groupby_work.json'), 'r') as specs_file: compare_specs_with_file(specs, specs_file) # Group by Degree, Work specs = df.groupby(['Degree', 'Work']).mean().specs() script_dir = os.path.dirname(__file__) with open( os.path.join(script_dir, '../../inst/specs/groupby_degree_work.json'), 'r') as specs_file: compare_specs_with_file(specs, specs_file) # Group by Work, Degree specs = df.groupby(['Work', 'Degree']).mean().specs() script_dir = os.path.dirname(__file__) with open( os.path.join(script_dir, '../../inst/specs/groupby_work_degree.json'), 'r') as specs_file: compare_specs_with_file(specs, specs_file) #Min # Group by Degree Min specs = df.groupby('Degree').min('Salary').specs() script_dir = os.path.dirname(__file__) with open( os.path.join( script_dir, '../../sandbox/custom_animations/custom-animations-min-R.json' ), 'r') as specs_file: compare_specs_with_file(specs, specs_file) # Group by Degree, Work Min specs = df.groupby(['Degree', 'Work']).min('Salary').specs() script_dir = os.path.dirname(__file__) with open( os.path.join(script_dir, '../../inst/specs/min_specs_two_columns.json'), 'r') as specs_file: compare_specs_with_file(specs, specs_file) #Max # Group by Degree Max specs = df.groupby('Degree').max('Salary').specs() script_dir = os.path.dirname(__file__) with open( os.path.join( script_dir, '../../sandbox/custom_animations/custom-animations-max-R.json' ), 'r') as specs_file: compare_specs_with_file(specs, specs_file) # Group by Degree, Work Max specs = df.groupby(['Degree', 'Work']).max('Salary').specs() script_dir = os.path.dirname(__file__) with open( os.path.join(script_dir, '../../inst/specs/max_specs_two_columns.json'), 'r') as specs_file: compare_specs_with_file(specs, specs_file) # Sum # Sum of Group by Degree specs = df.groupby('Degree').sum().specs() script_dir = os.path.dirname(__file__) with open(os.path.join(script_dir, '../../inst/specs/sum_specs.json'), 'r') as specs_file: compare_specs_with_file(specs, specs_file) # Sum of Group by Degree, Work specs = df.groupby(['Degree', 'Work']).sum().specs() script_dir = os.path.dirname(__file__) with open( os.path.join(script_dir, '../../inst/specs/sum_specs_two_columns.json'), 'r') as specs_file: compare_specs_with_file(specs, specs_file) # Product # Product of Group by Degree specs = df.groupby('Degree').prod().specs() script_dir = os.path.dirname(__file__) with open(os.path.join(script_dir, '../../inst/specs/prod_specs.json'), 'r') as specs_file: compare_specs_with_file(specs, specs_file) # Product of Group by Degree, Work specs = df.groupby(['Degree', 'Work']).prod().specs() script_dir = os.path.dirname(__file__) with open( os.path.join(script_dir, '../../inst/specs/prod_specs_two_columns.json'), 'r') as specs_file: compare_specs_with_file(specs, specs_file) #Count group by Degree count_spec = df.groupby('Degree').count('Salary').specs() script_dir = os.path.dirname(__file__) with open( os.path.join(script_dir, '../../inst/specs/count_specs_one_column.json'), 'r') as specs_file: compare_specs_with_file(count_spec, specs_file) #Count group by Degree, Work count_spec = df.groupby(['Degree', 'Work']).count('Salary').specs() script_dir = os.path.dirname(__file__) with open( os.path.join(script_dir, '../../inst/specs/count_specs_two_columns.json'), 'r') as specs_file: compare_specs_with_file(count_spec, specs_file) #Quantile group by Degree quant_spec = df.groupby('Degree').quantile('Salary', 0.01).specs() script_dir = os.path.dirname(__file__) with open( os.path.join( script_dir, '../../sandbox/custom_animations/custom-animations-quantile-R.json' ), 'r') as specs_file: compare_specs_with_file(quant_spec, specs_file) #Quantile group by Degree, Work quant_spec = df.groupby(['Degree', 'Work']).quantile('Salary', 0.01).specs() script_dir = os.path.dirname(__file__) with open( os.path.join(script_dir, '../../inst/specs/quantile_specs_two_columns.json'), 'r') as specs_file: compare_specs_with_file(quant_spec, specs_file)
def test_datamation_groupby(): df = small_salary().df df = DatamationFrame(df) # Group by Degree mean = df.groupby('Degree').mean() assert "groupby" in mean.operations assert "mean" in mean.operations assert len(mean.states) == 2 assert df.equals(mean.states[0]) assert mean.Salary.Masters == 90.22633400617633 assert mean.Salary.PhD == 88.24560612632195 # median median = df.groupby('Degree').median() assert "groupby" in median.operations assert "median" in median.operations assert len(median.states) == 2 assert df.equals(median.states[0]) assert median.Salary.Masters == 91.13211765489541 assert median.Salary.PhD == 86.40630845562555 # sum sum = df.groupby('Degree').sum() assert "groupby" in sum.operations assert "sum" in sum.operations assert len(sum.states) == 2 assert df.equals(sum.states[0]) assert sum.Salary.Masters == 6496.296048444696 assert sum.Salary.PhD == 2470.8769715370145 # product product = df.groupby('Degree').prod() assert "groupby" in product.operations assert "product" in product.operations assert len(product.states) == 2 assert df.equals(product.states[0]) assert product.Salary.PhD == 2.9426590692781414e+54 assert product.Salary.Masters == 5.892246828184284e+140 # Group by Work mean = df.groupby('Work').mean() assert "groupby" in mean.operations assert "mean" in mean.operations assert len(mean.states) == 2 assert df.equals(mean.states[0]) assert mean.Salary.Academia == 85.01222196154829 assert mean.Salary.Industry == 91.48376118136609