def core_model_test(base_dir, mapping_table, taxa_level, output_dir): """ Tests the core model Inputs: base_dir: base common directory of all mapping files mapping_table: dictionary with the mapping table information output_dir: output directory """ profiles = get_profiles_list(base_dir, mapping_table, taxa_level) # Bootstrap profiles to get the results profile, mean, stdev, ci = bootstrap_profiles(normalize_profiles(profiles)) # Write the bootstrapped profile profile_fp = join(output_dir, 'core_model_profile.txt') write_profile(profile, profile_fp) # Write the test result output_fp = join(output_dir, 'core_model_result.txt') outf = open(output_fp, 'w') outf.write("Results for the core model test:\n") outf.write("Microbiome model: ") if profile['not_shared'] < 0.5: outf.write("Substantial core.\n") elif profile['not_shared'] < 1.0: outf.write("Minimal core.\n") else: outf.write('No core\n') outf.write("\nStatistical results (amount shared):\n") outf.write("Mean: %f %%\n" % (mean * 100)) outf.write("Standard deviation: %f %%\n" % (stdev * 100)) outf.write("Confidence interval for the mean: [%f %%, %f %%]\n" % ((ci[0] * 100), (ci[1] * 100)))
def core_model_test(base_dir, mapping_table, taxa_level, output_dir): """ Tests the core model Inputs: base_dir: base common directory of all mapping files mapping_table: dictionary with the mapping table information output_dir: output directory """ profiles = get_profiles_list(base_dir, mapping_table, taxa_level) # Bootstrap profiles to get the results profile, mean, stdev, ci = bootstrap_profiles(normalize_profiles(profiles)) # Write the bootstrapped profile profile_fp = join(output_dir, 'core_model_profile.txt') write_profile(profile, profile_fp) # Write the test result output_fp = join(output_dir, 'core_model_result.txt') outf = open(output_fp, 'w') outf.write("Results for the core model test:\n") outf.write("Microbiome model: ") if profile['not_shared'] < 0.5: outf.write("Substantial core.\n") elif profile['not_shared'] < 1.0: outf.write("Minimal core.\n") else: outf.write('No core\n') outf.write("\nStatistical results (amount shared):\n") outf.write("Mean: %f %%\n" % (mean * 100)) outf.write("Standard deviation: %f %%\n" % (stdev * 100)) outf.write("Confidence interval for the mean: [%f %%, %f %%]\n" % ((ci[0] * 100), (ci[1] * 100)))
def test_normalize_profiles_neq_len(self): '''Normalizing two profiles with many diferent taxa''' self.assertEquals(normalize_profiles(self.uneven_profiles), [{'taxa1': 0.20, 'taxa2': 0.00, 'taxa3': 0.30, 'taxa4': 0.00, 'taxa5': 0.15, 'taxa6': 0.35, 'taxa7': 0.00, 'not_shared': 0.00}, {'taxa1': 0.10, 'taxa2': 0.50, 'taxa3': 0.00, 'taxa4': 0.15, 'taxa5': 0.05, 'taxa6': 0.00, 'taxa7': 0.20, 'not_shared': 0.00}])
def test_normalize_profiles_eq_len(self): '''Normalizing two profiles of equal length''' self.assertEquals(normalize_profiles(self.many_profiles), [{'taxa1': 0.20, 'taxa2': 0.30, 'taxa3': 0.15, 'taxa4': 0.35, 'not_shared': 0.00}, {'taxa1': 0.10, 'taxa2': 0.50, 'taxa3': 0.15, 'taxa4': 0.25, 'not_shared': 0.00}, {'taxa1': 0.15, 'taxa2': 0.22, 'taxa3': 0.15, 'taxa4': 0.48, 'not_shared': 0.00}])
def bootstrap_profiles(profiles, alpha=0.05, repetitions=1000, randfunc=randint): """Performs bootstrapping over the sample 'profiles' Inputs: profiles: list of profiles alpha: defines the confidence interval as 1 - alpha repetitions: number of bootstrap iterations randfunc: random function for generate the bootstrap samples Returns: profile: the bootstrapped profile of the profiles list sample_mean: the bootstrap mean of the amount shared sample_stdev: the bootstrap standard deviation of the amount shared ci: the confidence interval for the bootstrap mean """ length = len(profiles) normalize_profiles(profiles) boot_shared = [] boot_profiles = [] for i in range(repetitions): # Construct the bootstrap sample resample = [profiles[randfunc(0, length)] for j in range(length)] profile = compare_profiles(resample) # Store the amount shared boot_shared.append(1.0 - profile['not_shared']) # Store the result profile boot_profiles.append(profile) # Convert data to a numpy array boot_shared = array(boot_shared) # Get the mean and the standard deviation of the shared data sample_mean = mean(boot_shared) sample_stdev = std(boot_shared) # Compute the confidence interval for the bootstrapped data # using bootstrap percentile interval ci = quantile(boot_shared, [alpha / 2, 1 - (alpha / 2)]) # Compute the bootstrapped profile of the profiles list profile = compare_profiles(profiles) return profile, 1.0 - profile['not_shared'], sample_stdev, ci
def bootstrap_profiles(profiles, alpha=0.05, repetitions=1000, randfunc=randint): """Performs bootstrapping over the sample 'profiles' Inputs: profiles: list of profiles alpha: defines the confidence interval as 1 - alpha repetitions: number of bootstrap iterations randfunc: random function for generate the bootstrap samples Returns: profile: the bootstrapped profile of the profiles list sample_mean: the bootstrap mean of the amount shared sample_stdev: the bootstrap standard deviation of the amount shared ci: the confidence interval for the bootstrap mean """ length = len(profiles) normalize_profiles(profiles) boot_shared = [] boot_profiles = [] for i in range(repetitions): # Construct the bootstrap sample resample = [profiles[randfunc(0, length)] for j in range(length)] profile = compare_profiles(resample) # Store the amount shared boot_shared.append(1.0 - profile['not_shared']) # Store the result profile boot_profiles.append(profile) # Convert data to a numpy array boot_shared = array(boot_shared) # Get the mean and the standard deviation of the shared data sample_mean = mean(boot_shared) sample_stdev = std(boot_shared) # Compute the confidence interval for the bootstrapped data # using bootstrap percentile interval ci = quantile(boot_shared, [alpha/2, 1-(alpha/2)]) # Compute the bootstrapped profile of the profiles list profile = compare_profiles(profiles) return profile, 1.0-profile['not_shared'], sample_stdev, ci
def test_normalize_profiles_neq_len(self): '''Normalizing two profiles with many diferent taxa''' self.assertEquals(normalize_profiles(self.uneven_profiles), [{ 'taxa1': 0.20, 'taxa2': 0.00, 'taxa3': 0.30, 'taxa4': 0.00, 'taxa5': 0.15, 'taxa6': 0.35, 'taxa7': 0.00, 'not_shared': 0.00 }, { 'taxa1': 0.10, 'taxa2': 0.50, 'taxa3': 0.00, 'taxa4': 0.15, 'taxa5': 0.05, 'taxa6': 0.00, 'taxa7': 0.20, 'not_shared': 0.00 }])
def test_normalize_profiles_eq_len(self): '''Normalizing two profiles of equal length''' self.assertEquals(normalize_profiles(self.many_profiles), [{ 'taxa1': 0.20, 'taxa2': 0.30, 'taxa3': 0.15, 'taxa4': 0.35, 'not_shared': 0.00 }, { 'taxa1': 0.10, 'taxa2': 0.50, 'taxa3': 0.15, 'taxa4': 0.25, 'not_shared': 0.00 }, { 'taxa1': 0.15, 'taxa2': 0.22, 'taxa3': 0.15, 'taxa4': 0.48, 'not_shared': 0.00 }])