"""Parameter estimates of beak depths Estimate the difference of the mean beak depth of the G. scandens samples from 1975 and 2012 and report a 95% confidence interval.""" import pandas as pd import numpy as np from customlib import finch_beaks_df as finch from customlib import bootstrap_repl as bt bd_1975, bd_2012, bl_1975, bl_2012 = finch.draw_finch_data() # Compute the difference of the sample means: mean_diff mean_diff = np.mean(bd_2012) - np.mean(bd_1975) # Get bootstrap replicates of means bs_replicates_1975 = bt.draw_bs_reps(bd_1975,np.mean,10000) bs_replicates_2012 = bt.draw_bs_reps(bd_2012,np.mean,10000) # Compute samples of difference of means: bs_diff_replicates bs_diff_replicates = bs_replicates_2012 - bs_replicates_1975 # Compute 95% confidence interval: conf_int conf_int = np.percentile(bs_diff_replicates,[2.5, 97.5]) # Print the results print('difference of means =', mean_diff, 'mm') print('95% confidence interval =', conf_int, 'mm')
import numpy as np import pandas as pd from customlib import bootstrap_repl as bt df = pd.read_csv( 'C:/Users/amlan/Documents/Git Repos/Machine Learning/Neural-Networks-DataCamp/customlib/datasets/bee_sperm.csv', skiprows=3) control, treated = df[df['Treatment'] == 'Control']['AliveSperm'], df[ df['Treatment'] == 'Pesticide']['AliveSperm'] # Compute the difference in mean sperm count: diff_means diff_means = np.mean(control) - np.mean(treated) # Compute mean of pooled data: mean_count sperm_concat = np.concatenate((control, treated)) mean_count = np.mean(sperm_concat) # Generate shifted data sets control_shifted = control - np.mean(control) + mean_count treated_shifted = treated - np.mean(treated) + mean_count # Generate bootstrap replicates bs_reps_control = bt.draw_bs_reps(control_shifted, np.mean, size=10000) bs_reps_treated = bt.draw_bs_reps(treated_shifted, np.mean, size=10000) # Get replicates of difference of means: bs_replicates bs_replicates = bs_reps_control - bs_reps_treated # Compute and print p-value: p p = np.sum(bs_replicates >= np.mean(control) - np.mean(treated)) \ / len(bs_replicates) print('p-value =', p)
import numpy as np from customlib import bootstrap_repl as bt # Compute mean of all forces: mean_force mean_force = np.mean(forces_concat) # Generate shifted arrays force_a_shifted = force_a - np.mean(force_a) + mean_force force_b_shifted = force_b - np.mean(force_b) + mean_force # Compute 10,000 bootstrap replicates from shifted arrays bs_replicates_a = bt.draw_bs_reps(force_a_shifted, np.mean, 10000) bs_replicates_b = bt.draw_bs_reps(force_b_shifted, np.mean, 10000) # Get replicates of difference of means: bs_replicates bs_replicates = bs_replicates_a - bs_replicates_b empirical_diff_means = np.mean(force_a) - np.mean(force_b) # Compute and print p-value: p p = np.sum(bs_replicates >= empirical_diff_means) / len(bs_replicates) #print('p', p) print('p-value =', p)
rainfall = np.array([ 875.5, 648.2, 788.1, 940.3, 491.1, 743.5, 730.1, 686.5, 878.8, 865.6, 654.9, 831.5, 798.1, 681.8, 743.8, 689.1, 752.1, 837.2, 710.6, 749.2, 967.1, 701.2, 619., 747.6, 803.4, 645.6, 804.1, 787.4, 646.8, 997.1, 774., 734.5, 835., 840.7, 659.6, 828.3, 909.7, 856.9, 578.3, 904.2, 883.9, 740.1, 773.9, 741.4, 866.8, 871.1, 712.5, 919.2, 927.9, 809.4, 633.8, 626.8, 871.3, 774.3, 898.8, 789.6, 936.3, 765.4, 882.1, 681.1, 661.3, 847.9, 683.9, 985.7, 771.1, 736.6, 713.2, 774.5, 937.7, 694.5, 598.2, 983.8, 700.2, 901.3, 733.5, 964.4, 609.3, 1035.2, 718., 688.6, 736.8, 643.3, 1038.5, 969., 802.7, 876.6, 944.7, 786.6, 770.4, 808.6, 761.3, 774.2, 559.3, 674.2, 883.6, 823.9, 960.4, 877.8, 940.6, 831.8, 906.2, 866.5, 674.1, 998.1, 789.3, 915., 737.1, 763., 666.7, 824.5, 913.8, 905.1, 667.8, 747.4, 784.7, 925.4, 880.2, 1086.9, 764.4, 1050.1, 595.2, 855.2, 726.9, 785.2, 948.8, 970.6, 896., 618.4, 572.4, 1146.4, 728.2, 864.2, 793. ]) # Generate 10,000 bootstrap replicates of the variance: bs_replicates bs_replicates = bt.draw_bs_reps(rainfall, np.var, 10000) # Put the variance in units of square centimeters bs_replicates = bs_replicates / 100 # Make a histogram of the results _ = plt.hist(bs_replicates, bins=50, density=True) _ = plt.xlabel('variance of annual rainfall (sq. cm)') _ = plt.ylabel('PDF') # Show the plot plt.show()
654.9, 831.5, 798.1, 681.8, 743.8, 689.1, 752.1, 837.2, 710.6, 749.2, 967.1, 701.2, 619., 747.6, 803.4, 645.6, 804.1, 787.4, 646.8, 997.1, 774., 734.5, 835., 840.7, 659.6, 828.3, 909.7, 856.9, 578.3, 904.2, 883.9, 740.1, 773.9, 741.4, 866.8, 871.1, 712.5, 919.2, 927.9, 809.4, 633.8, 626.8, 871.3, 774.3, 898.8, 789.6, 936.3, 765.4, 882.1, 681.1, 661.3, 847.9, 683.9, 985.7, 771.1, 736.6, 713.2, 774.5, 937.7, 694.5, 598.2, 983.8, 700.2, 901.3, 733.5, 964.4, 609.3, 1035.2, 718., 688.6, 736.8, 643.3, 1038.5, 969., 802.7, 876.6, 944.7, 786.6, 770.4, 808.6, 761.3, 774.2, 559.3, 674.2, 883.6, 823.9, 960.4, 877.8, 940.6, 831.8, 906.2, 866.5, 674.1, 998.1, 789.3, 915., 737.1, 763., 666.7, 824.5, 913.8, 905.1, 667.8, 747.4, 784.7, 925.4, 880.2, 1086.9, 764.4, 1050.1, 595.2, 855.2, 726.9, 785.2, 948.8, 970.6, 896., 618.4, 572.4, 1146.4, 728.2, 864.2, 793. ]) # Take 10,000 bootstrap replicates of the mean: bs_replicates bs_replicates = bt.draw_bs_reps(rainfall, np.mean, 10000) # Compute and print SEM sem = np.std(rainfall) / np.sqrt(len(rainfall)) print(sem) # Compute and print standard deviation of bootstrap replicates bs_std = np.std(bs_replicates) print(bs_std) # Make a histogram of the results _ = plt.hist(bs_replicates, bins=50, density=True) _ = plt.xlabel('mean annual rainfall (mm)') _ = plt.ylabel('PDF') # Show the plot
For that we could use a permutation test. The hypothesis is that the means are equal. To perform this hypothesis test, we need to shift the two data sets so that they have the same mean and then use bootstrap sampling to compute the difference of means.""" import pandas as pd import numpy as np from customlib import bootstrap_repl as bt from customlib import finch_beaks_df as finch bd_1975, bd_2012, bl_1975, bl_2012 = finch.draw_finch_data() # Compute mean of combined data set: combined_mean combined_mean = np.mean(np.concatenate((bd_1975, bd_2012))) # Shift the samples bd_1975_shifted = bd_1975 - np.mean(bd_1975) + combined_mean bd_2012_shifted = bd_2012 - np.mean(bd_2012) + combined_mean # Get bootstrap replicates of shifted data sets bs_replicates_1975 = bt.draw_bs_reps(bd_1975_shifted, np.mean, 10000) bs_replicates_2012 = bt.draw_bs_reps(bd_2012_shifted, np.mean, 10000) # Compute replicates of difference of means: bs_diff_replicates bs_diff_replicates = bs_replicates_2012 - bs_replicates_1975 mean_diff = 0.22622047244094645 # Compute the p-value p = np.sum(bs_diff_replicates >= mean_diff) / len(bs_diff_replicates) # Print p-value print('p =', p)
import numpy as np from customlib import bootstrap_repl as bt # Make an array of translated impact forces: translated_force_b translated_force_b = force_b - np.mean(force_b) + 0.55 # Take bootstrap replicates of Frog B's translated impact forces: bs_replicates bs_replicates = bt.draw_bs_reps(translated_force_b, np.mean, 10000) # Compute fraction of replicates that are less than the observed Frog B force: p p = np.sum(bs_replicates <= np.mean(force_b)) / 10000 # Print the p-value print('p = ', p)