diff_subject_snp_plowers = []
diff_subject_snp_puppers = []
diff_subject_gene_plowers = []
diff_subject_gene_puppers = []
between_host_gene_idx_map = {}
low_divergence_between_host_gene_idx_map = {}
for sample_pair_idx in xrange(0, len(diff_subject_idxs[0])):

    snp_i = diff_subject_idxs[0][sample_pair_idx]
    snp_j = diff_subject_idxs[1][sample_pair_idx]

    #plower = snp_substitution_rate[snp_i,snp_j]
    #pupper = plower*1.1

    plower, pupper = stats_utils.calculate_poisson_rate_interval(
        snp_difference_matrix[snp_i, snp_j], snp_opportunity_matrix[snp_i,
                                                                    snp_j])

    diff_subject_snp_plowers.append(plower)
    diff_subject_snp_puppers.append(pupper)

# clip lower bounds
diff_subject_snp_plowers = numpy.clip(diff_subject_snp_plowers, 1e-07, 1e09)
# Sort all four lists by ascending lower bound on SNP changes, then gene changes
diff_subject_snp_plowers, diff_subject_snp_puppers = (
    numpy.array(x) for x in zip(
        *sorted(zip(diff_subject_snp_plowers, diff_subject_snp_puppers))))

##############################################################################
#
# Now plot figures
#print typical_same_subject_snp_opportunities, typical_diff_subject_snp_opportunities
#print typical_same_subject_gene_opportunities, typical_diff_subject_gene_opportunities

Lsnps = typical_diff_subject_snp_opportunities
Lgenes = typical_diff_subject_gene_opportunities

same_subject_snp_plowers = []
same_subject_snp_puppers = []
same_subject_gene_plowers = []
same_subject_gene_puppers = []
for sample_pair_idx in xrange(0, len(same_subject_snp_idxs[0])):

    i = same_subject_snp_idxs[0][sample_pair_idx]
    j = same_subject_snp_idxs[1][sample_pair_idx]

    plower, pupper = stats_utils.calculate_poisson_rate_interval(
        snp_difference_matrix[i, j], snp_opportunity_matrix[i, j], alpha)

    same_subject_snp_plowers.append(plower)
    same_subject_snp_puppers.append(pupper)

    snp_differences = diversity_utils.calculate_snp_differences_between(
        i, j, allele_counts_map, passed_sites_map, min_change=min_change)

    i = same_subject_gene_idxs[0][sample_pair_idx]
    j = same_subject_gene_idxs[1][sample_pair_idx]
    #gene_differences = gene_diversity_utils.calculate_gene_differences_between(i, j, gene_depth_matrix, marker_coverages, min_log2_fold_change=4)

    plower, pupper = stats_utils.calculate_poisson_rate_interval(
        gene_difference_matrix[i, j], gene_opportunity_matrix[i, j])

    same_subject_gene_plowers.append(plower)
        median_depths = []
        depth_lowers = []
        depth_uppers = []
        # Now do polymorphism axis
        for sample in highcoverage_samples:
            within_sites, between_sites, total_sites = sfs_utils.calculate_polymorphism_rates_from_sfs_map(
                sfs_map[sample])

            within_rate = within_sites * 1.0 / total_sites
            between_rate = between_sites * 1.0 / total_sites
            between_rates.append(between_rate)
            within_rates.append(within_rate)

            # Calculate 95% confidence intervals
            within_rate_lower, within_rate_upper = stats_utils.calculate_poisson_rate_interval(
                within_sites, total_sites, alpha=0.05)
            within_rate_lowers.append(within_rate_lower)
            within_rate_uppers.append(within_rate_upper)

            depths, counts = sfs_utils.calculate_depth_distribution_from_sfs_map(
                sfs_map[sample])
            dlower, dupper = stats_utils.calculate_IQR_from_distribution(
                depths, counts)
            dmedian = stats_utils.calculate_median_from_distribution(
                depths, counts)

            depth_lowers.append(dlower)
            depth_uppers.append(dupper)
            median_depths.append(dmedian)
            sample_names.append(sample)
Esempio n. 4
0
for sample_pair_idx in xrange(0, len(same_subject_snp_idxs[0])):

    i = same_subject_snp_idxs[0][sample_pair_idx]
    j = same_subject_snp_idxs[1][sample_pair_idx]

    i2 = desired_same_subject_idxs[0][sample_pair_idx]
    j2 = desired_same_subject_idxs[1][sample_pair_idx]

    if pis[i2] > pis[j2]:
        pi_idx = i2
    else:
        pi_idx = j2

    # Calculate max pi between two samples
    plower, pupper = stats_utils.calculate_poisson_rate_interval(
        total_pis[pi_idx], total_pi_opportunities[pi_idx], alpha)
    pmid = total_pis[pi_idx] * 1.0 / total_pi_opportunities[pi_idx]

    same_subject_pi_plowers.append(plower)
    same_subject_pi_pmids.append(pmid)
    same_subject_pi_puppers.append(pupper)

    # Calculate SNP changes
    plower, pupper = stats_utils.calculate_poisson_rate_interval(
        snp_difference_matrix[i, j], snp_opportunity_matrix[i, j], alpha)

    pmid = snp_difference_matrix[i, j] * 1.0 / snp_opportunity_matrix[i, j]

    same_subject_snp_plowers.append(plower)
    same_subject_snp_pmids.append(pmid)
    same_subject_snp_puppers.append(pupper)