Beispiel #1
0
replacement_map = {}

for species_name in good_species_list:

    if not species_name.startswith('Bacteroides_fragilis'):
        continue

    # Only plot samples above a certain depth threshold that are "haploids"
    haploid_samples = diversity_utils.calculate_haploid_samples(species_name,
                                                                debug=debug)

    if len(haploid_samples) < min_sample_size:
        continue

    same_sample_idxs, same_subject_idxs, diff_subject_idxs = parse_midas_data.calculate_ordered_subject_pairs(
        sample_order_map, haploid_samples)

    snp_samples = set()
    sample_size = 0
    for sample_pair_idx in xrange(0, len(same_subject_idxs[0])):

        i = same_subject_idxs[0][sample_pair_idx]
        j = same_subject_idxs[1][sample_pair_idx]

        snp_samples.add(haploid_samples[i])
        snp_samples.add(haploid_samples[j])

        sample_size += 1

    snp_samples = list(snp_samples)
    include_high_copynum=include_high_copynum)

gene_difference_matrix = gene_gain_matrix + gene_loss_matrix

# Now need to make the gene samples and snp samples match up
desired_samples = gene_samples

num_haploids = len(desired_samples)

# Calculate which pairs of idxs belong to the same sample, which to the same subject
# and which to different subjects
#desired_same_sample_idxs, desired_same_subject_idxs, desired_diff_subject_idxs = parse_midas_data.calculate_subject_pairs(subject_sample_map, desired_samples)

# Calculate which pairs of idxs belong to the same sample, which to the same subject
# and which to different subjects
desired_same_sample_idxs, desired_same_subject_idxs, desired_diff_subject_idxs = parse_midas_data.calculate_ordered_subject_pairs(
    sample_order_map, desired_samples)

snp_sample_idx_map = parse_midas_data.calculate_sample_idx_map(
    desired_samples, snp_samples)
gene_sample_idx_map = parse_midas_data.calculate_sample_idx_map(
    desired_samples, gene_samples)

same_sample_snp_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    snp_sample_idx_map, desired_same_sample_idxs)
same_sample_gene_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    gene_sample_idx_map, desired_same_sample_idxs)

same_subject_snp_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    snp_sample_idx_map, desired_same_subject_idxs)
same_subject_gene_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    gene_sample_idx_map, desired_same_subject_idxs)
                                                                debug=debug)

    if len(haploid_samples) < min_sample_size:
        continue

    # all samples
    temporal_samples = diversity_utils.calculate_temporal_samples(species_name)

    # temporal changes
    sys.stderr.write("Loading pre-computed temporal changes for %s...\n" %
                     species_name)
    temporal_change_map = calculate_temporal_changes.load_temporal_change_map(
        species_name)
    sys.stderr.write("Done!\n")

    same_sample_idxs, same_subject_idxs, diff_subject_idxs = parse_midas_data.calculate_ordered_subject_pairs(
        sample_order_map, temporal_samples)

    snp_samples = set()
    sample_size = 0
    for sample_pair_idx in xrange(0, len(same_subject_idxs[0])):

        i = same_subject_idxs[0][sample_pair_idx]
        j = same_subject_idxs[1][sample_pair_idx]

        sample_i = temporal_samples[i]
        sample_j = temporal_samples[j]

        # Don't look at haploids here!
        if sample_i in haploid_samples:
            continue