Esempio n. 1
0
 #all_species_gene_changes_category={}
 all_species_null = {}
 all_data[species_name] = {}
 #
 ####################
 # Analyze the data #
 ####################
 #
 # Only plot samples above a certain depth threshold that are "haploids"
 haploid_samples = diversity_utils.calculate_haploid_samples(species_name,
                                                             debug=debug)
 #
 if len(haploid_samples) < min_sample_size:
     continue
 #
 same_sample_idxs, same_subject_idxs, diff_subject_idxs = parse_midas_data.calculate_ordered_subject_pairs(
     sample_order_map, haploid_samples)
 #
 snp_samples = set()
 sample_size = 0
 for sample_pair_idx in xrange(0, len(same_subject_idxs[0])):
     #
     i = same_subject_idxs[0][sample_pair_idx]
     j = same_subject_idxs[1][sample_pair_idx]
     #
     snp_samples.add(haploid_samples[i])
     snp_samples.add(haploid_samples[j])
     #
     sample_size += 1
     #
 snp_samples = list(snp_samples)
 allowed_sample_set = set(snp_samples)
sample_country_map = parse_HMP_data.parse_sample_country_map()
sample_order_map = parse_HMP_data.parse_sample_order_map()
sys.stderr.write("Done!\n")

temporal_samples = diversity_utils.calculate_temporal_samples(
    species_name, min_coverage=config.min_median_coverage)
haploid_samples = set(
    diversity_utils.calculate_haploid_samples(species_name, debug=debug))

import sfs_utils
sys.stderr.write("Loading SFSs for %s...\t" % species_name)
samples, sfs_map = parse_midas_data.parse_within_sample_sfs(
    species_name, allowed_variant_types=set(['1D', '2D', '3D', '4D']))
sys.stderr.write("Done!\n")

same_sample_idxs, same_subject_idxs, diff_subject_idxs = parse_midas_data.calculate_ordered_subject_pairs(
    sample_order_map, temporal_samples)

frequency_bins = numpy.linspace(0, 1, 21)

#fs = numpy.array([0.1,0.2,0.3,0.4,0.5,0.5,0.6,0.7,0.8,0.9])

dfs = numpy.array([0.6, 0.7, 0.8, 0.9, 0.98])

perrs = []

for sample_pair_idx in xrange(0, len(same_subject_idxs[0])):

    i = same_subject_idxs[0][sample_pair_idx]
    j = same_subject_idxs[1][sample_pair_idx]

    sample_i = temporal_samples[i]