Beispiel #1
0
    if not ((snp_samples[snp_i] in haploid_samples) and (snp_samples[snp_j] in haploid_samples)):
        # both have to be haploids
        continue
        
    
    plower = snp_difference_matrix[snp_i,snp_j]
    pupper = plower*1.1
 
    #plower,pupper = stats_utils.calculate_poisson_rate_interval(snp_difference_matrix[snp_i,snp_j], snp_opportunity_matrix[snp_i, snp_j])
    
    diff_subject_snp_plowers.append(plower)
    diff_subject_snp_puppers.append(pupper)
        
    i = diff_subject_gene_idxs[0][sample_pair_idx]
    j = diff_subject_gene_idxs[1][sample_pair_idx]
    gene_differences = gene_diversity_utils.calculate_gene_differences_between(i, j, gene_depth_matrix, marker_coverages,include_high_copynum=False)

    if snp_substitution_rate[snp_i,snp_j] < clade_divergence_threshold:
    
        for gene_idx, depth_tuple_1, depth_tuple_2 in gene_differences:
            if gene_idx not in between_host_gene_idx_map:
                between_host_gene_idx_map[gene_idx]=0
            
            
            between_host_gene_idxs.append(gene_idx)
            
            between_host_gene_idx_map[gene_idx]+=1
    
    if snp_substitution_rate[snp_i,snp_j] < modification_divergence_threshold:
        # A modification, not a replacement!
        for gene_idx, depth_tuple_1, depth_tuple_2 in gene_differences:
Beispiel #2
0
               label='CN>0.01')
pylab.semilogy([10, 10], [1, 1e04], 'k:')
pylab.xlim([-1, prevalences[-1] + 1])
pylab.legend(loc='lower right', frameon=False, fontsize=8)
pylab.savefig('%s/%s_reference_gene_presence_sfs.pdf' %
              (parse_midas_data.analysis_directory, species_name),
              bbox_inches='tight')
pylab.savefig('%s/%s_reference_gene_presence_sfs.png' %
              (parse_midas_data.analysis_directory, species_name),
              bbox_inches='tight',
              dpi=300)

sys.exit(0)
within_subject_gene_change_prevalences = []
# Calculate gene content differences
for i, j in zip(same_subject_idxs[0], same_subject_idxs[1]):

    gene_differences = gene_diversity_utils.calculate_gene_differences_between(
        i,
        j,
        gene_names,
        gene_depth_matrix,
        marker_coverages,
        min_log2_fold_change=4)

    if len(gene_differences) > 0:
        print "Differences between", i, j
        for idx in xrange(0, len(gene_differences)):
            print gene_differences[idx], gene_prevalence_map[
                gene_differences[idx][0]]
    #
    snp_i = same_subject_snp_idxs[0][sample_pair_idx]
    snp_j = same_subject_snp_idxs[1][sample_pair_idx]
    #
    plower, pupper = stats_utils.calculate_poisson_rate_interval(
        snp_difference_matrix[snp_i, snp_j],
        snp_opportunity_matrix[snp_i, snp_j], alpha)
    #
    same_subject_snp_plowers.append(plower)
    same_subject_snp_puppers.append(pupper)
    #
    #snp_differences = diversity_utils.calculate_snp_differences_between(i,j,allele_counts_map, passed_sites_map, min_change=min_change)
    #
    i = same_subject_gene_idxs[0][sample_pair_idx]
    j = same_subject_gene_idxs[1][sample_pair_idx]
    gene_differences = gene_diversity_utils.calculate_gene_differences_between(
        i, j, gene_depth_matrix, marker_coverages)

    if snp_substitution_rate[snp_i, snp_j] < low_divergence_threshold:
        for gene_idx, depth_tuple_1, depth_tuple_2 in gene_differences:
            if gene_idx not in within_host_gene_idx_map:
                within_host_gene_idx_map[gene_idx] = 0
#
            within_host_gene_idx_map[gene_idx] += 1
#
    plower, pupper = stats_utils.calculate_poisson_rate_interval(
        gene_difference_matrix[i, j], gene_opportunity_matrix[i, j])
    #
    same_subject_gene_plowers.append(plower)
    same_subject_gene_puppers.append(pupper)
#