Ejemplo n.º 1
0
def core_model_test(base_dir, mapping_table, taxa_level, output_dir):
    """ Tests the core model
    Inputs:
        base_dir: base common directory of all mapping files
        mapping_table: dictionary with the mapping table information
        output_dir: output directory
    """
    profiles = get_profiles_list(base_dir, mapping_table, taxa_level)
    # Bootstrap profiles to get the results
    profile, mean, stdev, ci = bootstrap_profiles(normalize_profiles(profiles))
    # Write the bootstrapped profile
    profile_fp = join(output_dir, 'core_model_profile.txt')
    write_profile(profile, profile_fp)
    # Write the test result
    output_fp = join(output_dir, 'core_model_result.txt')
    outf = open(output_fp, 'w')
    outf.write("Results for the core model test:\n")
    outf.write("Microbiome model: ")
    if profile['not_shared'] < 0.5:
        outf.write("Substantial core.\n")
    elif profile['not_shared'] < 1.0:
        outf.write("Minimal core.\n")
    else:
        outf.write('No core\n')
    outf.write("\nStatistical results (amount shared):\n")
    outf.write("Mean: %f %%\n" % (mean * 100))
    outf.write("Standard deviation: %f %%\n" % (stdev * 100))
    outf.write("Confidence interval for the mean: [%f %%, %f %%]\n"
               % ((ci[0] * 100), (ci[1] * 100)))
Ejemplo n.º 2
0
def core_model_test(base_dir, mapping_table, taxa_level, output_dir):
    """ Tests the core model
    Inputs:
        base_dir: base common directory of all mapping files
        mapping_table: dictionary with the mapping table information
        output_dir: output directory
    """
    profiles = get_profiles_list(base_dir, mapping_table, taxa_level)
    # Bootstrap profiles to get the results
    profile, mean, stdev, ci = bootstrap_profiles(normalize_profiles(profiles))
    # Write the bootstrapped profile
    profile_fp = join(output_dir, 'core_model_profile.txt')
    write_profile(profile, profile_fp)
    # Write the test result
    output_fp = join(output_dir, 'core_model_result.txt')
    outf = open(output_fp, 'w')
    outf.write("Results for the core model test:\n")
    outf.write("Microbiome model: ")
    if profile['not_shared'] < 0.5:
        outf.write("Substantial core.\n")
    elif profile['not_shared'] < 1.0:
        outf.write("Minimal core.\n")
    else:
        outf.write('No core\n')
    outf.write("\nStatistical results (amount shared):\n")
    outf.write("Mean: %f %%\n" % (mean * 100))
    outf.write("Standard deviation: %f %%\n" % (stdev * 100))
    outf.write("Confidence interval for the mean: [%f %%, %f %%]\n" %
               ((ci[0] * 100), (ci[1] * 100)))
Ejemplo n.º 3
0
 def test_normalize_profiles_neq_len(self):
     '''Normalizing two profiles with many diferent taxa'''
     self.assertEquals(normalize_profiles(self.uneven_profiles),
                       [{'taxa1': 0.20, 'taxa2': 0.00, 'taxa3': 0.30,
                        'taxa4': 0.00, 'taxa5': 0.15, 'taxa6': 0.35,
                        'taxa7': 0.00, 'not_shared': 0.00},
                        {'taxa1': 0.10, 'taxa2': 0.50, 'taxa3': 0.00,
                         'taxa4': 0.15, 'taxa5': 0.05, 'taxa6': 0.00,
                         'taxa7': 0.20, 'not_shared': 0.00}])
Ejemplo n.º 4
0
 def test_normalize_profiles_eq_len(self):
     '''Normalizing two profiles of equal length'''
     self.assertEquals(normalize_profiles(self.many_profiles),
                       [{'taxa1': 0.20, 'taxa2': 0.30, 'taxa3': 0.15,
                         'taxa4': 0.35, 'not_shared': 0.00},
                        {'taxa1': 0.10, 'taxa2': 0.50, 'taxa3': 0.15,
                         'taxa4': 0.25, 'not_shared': 0.00},
                        {'taxa1': 0.15, 'taxa2': 0.22, 'taxa3': 0.15,
                         'taxa4': 0.48, 'not_shared': 0.00}])
Ejemplo n.º 5
0
def bootstrap_profiles(profiles,
                       alpha=0.05,
                       repetitions=1000,
                       randfunc=randint):
    """Performs bootstrapping over the sample 'profiles'

    Inputs:
        profiles: list of profiles
        alpha: defines the confidence interval as 1 - alpha
        repetitions: number of bootstrap iterations
        randfunc: random function for generate the bootstrap samples

    Returns:
        profile: the bootstrapped profile of the profiles list
        sample_mean: the bootstrap mean of the amount shared
        sample_stdev: the bootstrap standard deviation of the amount shared
        ci: the confidence interval for the bootstrap mean
    """
    length = len(profiles)
    normalize_profiles(profiles)
    boot_shared = []
    boot_profiles = []
    for i in range(repetitions):
        # Construct the bootstrap sample
        resample = [profiles[randfunc(0, length)] for j in range(length)]
        profile = compare_profiles(resample)
        # Store the amount shared
        boot_shared.append(1.0 - profile['not_shared'])
        # Store the result profile
        boot_profiles.append(profile)
    # Convert data to a numpy array
    boot_shared = array(boot_shared)
    # Get the mean and the standard deviation of the shared data
    sample_mean = mean(boot_shared)
    sample_stdev = std(boot_shared)
    # Compute the confidence interval for the bootstrapped data
    # using bootstrap percentile interval
    ci = quantile(boot_shared, [alpha / 2, 1 - (alpha / 2)])
    # Compute the bootstrapped profile of the profiles list
    profile = compare_profiles(profiles)

    return profile, 1.0 - profile['not_shared'], sample_stdev, ci
Ejemplo n.º 6
0
def bootstrap_profiles(profiles, alpha=0.05, repetitions=1000,
    randfunc=randint):
    """Performs bootstrapping over the sample 'profiles'

    Inputs:
        profiles: list of profiles
        alpha: defines the confidence interval as 1 - alpha
        repetitions: number of bootstrap iterations
        randfunc: random function for generate the bootstrap samples

    Returns:
        profile: the bootstrapped profile of the profiles list
        sample_mean: the bootstrap mean of the amount shared
        sample_stdev: the bootstrap standard deviation of the amount shared
        ci: the confidence interval for the bootstrap mean
    """
    length = len(profiles)
    normalize_profiles(profiles)
    boot_shared = []
    boot_profiles = []
    for i in range(repetitions):
        # Construct the bootstrap sample
        resample = [profiles[randfunc(0, length)] for j in range(length)]
        profile = compare_profiles(resample)
        # Store the amount shared
        boot_shared.append(1.0 - profile['not_shared'])
        # Store the result profile
        boot_profiles.append(profile)
    # Convert data to a numpy array
    boot_shared = array(boot_shared)
    # Get the mean and the standard deviation of the shared data
    sample_mean = mean(boot_shared)
    sample_stdev = std(boot_shared)
    # Compute the confidence interval for the bootstrapped data
    # using bootstrap percentile interval
    ci = quantile(boot_shared, [alpha/2, 1-(alpha/2)])
    # Compute the bootstrapped profile of the profiles list
    profile = compare_profiles(profiles)

    return profile, 1.0-profile['not_shared'], sample_stdev, ci
Ejemplo n.º 7
0
 def test_normalize_profiles_neq_len(self):
     '''Normalizing two profiles with many diferent taxa'''
     self.assertEquals(normalize_profiles(self.uneven_profiles),
                       [{
                           'taxa1': 0.20,
                           'taxa2': 0.00,
                           'taxa3': 0.30,
                           'taxa4': 0.00,
                           'taxa5': 0.15,
                           'taxa6': 0.35,
                           'taxa7': 0.00,
                           'not_shared': 0.00
                       }, {
                           'taxa1': 0.10,
                           'taxa2': 0.50,
                           'taxa3': 0.00,
                           'taxa4': 0.15,
                           'taxa5': 0.05,
                           'taxa6': 0.00,
                           'taxa7': 0.20,
                           'not_shared': 0.00
                       }])
Ejemplo n.º 8
0
 def test_normalize_profiles_eq_len(self):
     '''Normalizing two profiles of equal length'''
     self.assertEquals(normalize_profiles(self.many_profiles),
                       [{
                           'taxa1': 0.20,
                           'taxa2': 0.30,
                           'taxa3': 0.15,
                           'taxa4': 0.35,
                           'not_shared': 0.00
                       }, {
                           'taxa1': 0.10,
                           'taxa2': 0.50,
                           'taxa3': 0.15,
                           'taxa4': 0.25,
                           'not_shared': 0.00
                       }, {
                           'taxa1': 0.15,
                           'taxa2': 0.22,
                           'taxa3': 0.15,
                           'taxa4': 0.48,
                           'not_shared': 0.00
                       }])