median_coverages = numpy.array(
    [sample_coverage_map[samples[i]] for i in xrange(0, len(samples))])

# Calculate full matrix of synonymous pairwise differences
sys.stderr.write("Calculate synonymous pi matrix...\n")
pi_matrix_syn, avg_pi_matrix_syn = diversity_utils.calculate_pi_matrix(
    allele_counts_map,
    passed_sites_map,
    variant_type='4D',
    allowed_genes=metaphlan2_genes)

# Calculate fixation matrix
fixation_matrix_syn = diversity_utils.calculate_fixation_matrix(
    allele_counts_map,
    passed_sites_map,
    variant_type='4D',
    min_change=min_change,
    allowed_genes=metaphlan2_genes)

sys.stderr.write("Done!\n")

# Calculate full matrix of nonsynonymous pairwise differences
sys.stderr.write("Calculate nonsynonymous pi matrix...\n")
# Calculate allele count matrices
pi_matrix_non, avg_pi_matrix_non = diversity_utils.calculate_pi_matrix(
    allele_counts_map,
    passed_sites_map,
    variant_type='1D',
    allowed_genes=metaphlan2_genes)
# Calculate fixation matrix
fixation_matrix_non = diversity_utils.calculate_fixation_matrix(
예제 #2
0
nonsynonymous_count_sfs = []

synonymous_pi_weighted_counts = 0
nonsynonymous_pi_weighted_counts = 0

    
final_line_number = 0
while final_line_number >= 0:
    
    sys.stderr.write("Loading chunk starting @ %d...\n" % final_line_number)
    snp_samples, allele_counts_map, passed_sites_map, final_line_number = parse_midas_data.parse_snps(species_name, debug=debug, allowed_variant_types=allowed_variant_types, allowed_samples=largest_clade_samples,allowed_genes=core_genes, chunk_size=chunk_size,initial_line_number=final_line_number)
    sys.stderr.write("Done! Loaded %d genes\n" % len(allele_counts_map.keys()))
    
    # Calculate fixation matrix
    sys.stderr.write("Calculating matrix of snp differences...\n")
    chunk_snp_difference_matrix, chunk_snp_opportunity_matrix = diversity_utils.calculate_fixation_matrix(allele_counts_map, passed_sites_map, allowed_genes=core_genes, min_change=min_change)    
    sys.stderr.write("Done!\n")
    
    if snp_difference_matrix.shape[0]==0:
        snp_difference_matrix = numpy.zeros_like(chunk_snp_difference_matrix)*1.0
        snp_opportunity_matrix = numpy.zeros_like(snp_difference_matrix)*1.0
        synonymous_difference_matrix = numpy.zeros_like(snp_difference_matrix)
        synonymous_opportunity_matrix = numpy.zeros_like(snp_difference_matrix)
        nonsynonymous_difference_matrix = numpy.zeros_like(snp_difference_matrix)
        nonsynonymous_opportunity_matrix = numpy.zeros_like(snp_difference_matrix)

        n = len(snp_samples)
        
        maf_bins = numpy.arange(1,n+1)*1.0/n
        maf_bins -= (maf_bins[1]-maf_bins[0])/2
        maf_bins[0]=-0.1
예제 #3
0
    else:
        sys.stderr.write("Analyzing %d haploid samples...\n" % len(desired_samples))

    species_idx += 1
    
    
    

    # Load SNP information for species_name
    sys.stderr.write("Loading %s...\n" % species_name)
    dummy_samples, allele_counts_map, passed_sites_map = parse_midas_data.parse_snps(species_name, debug=debug, allowed_samples=desired_samples)
    sys.stderr.write("Done!\n")
    
    # Calculate fixation matrices
    sys.stderr.write("Calculating 4D fixation matrix...\n")
    fixation_matrix_syn, fixation_opportunities_syn = diversity_utils.calculate_fixation_matrix(allele_counts_map, passed_sites_map, allowed_variant_types=set(['4D']), min_change=min_change)
    sys.stderr.write("Calculating 1D fixation matrix...\n")
    fixation_matrix_non, fixation_opportunities_non = diversity_utils.calculate_fixation_matrix(allele_counts_map, passed_sites_map, allowed_variant_types=set(['1D']), min_change=min_change)
    sys.stderr.write("Calculating total fixation matrix...\n")
    fixation_matrix_all, fixation_opportunities_all = diversity_utils.calculate_fixation_matrix(allele_counts_map, passed_sites_map, min_change=min_change)

    sys.stderr.write("Done!\n")

    # Calculate fraction nonsynonymous  
    dN = fixation_matrix_non/fixation_opportunities_non
    dS = fixation_matrix_syn/fixation_opportunities_syn
    dNplusdS = (dN+dS)
    fraction_nonsynonymous = dN/(dNplusdS+(dNplusdS==0))

    # Calculate total divergence
    dtot = fixation_matrix_all/fixation_opportunities_all
예제 #4
0
final_line_number = 0
while final_line_number >= 0:

    sys.stderr.write("Loading chunk starting @ %d...\n" % final_line_number)
    dummy_samples, allele_counts_map, passed_sites_map, final_line_number = parse_midas_data.parse_snps(
        species_name,
        debug=debug,
        allowed_samples=snp_samples,
        chunk_size=chunk_size,
        initial_line_number=final_line_number)
    sys.stderr.write("Done! Loaded %d genes\n" % len(allele_counts_map.keys()))

    # Calculate fixation matrix
    sys.stderr.write("Calculating matrix of snp differences...\n")
    chunk_snp_difference_matrix, chunk_snp_opportunity_matrix = diversity_utils.calculate_fixation_matrix(
        allele_counts_map, passed_sites_map, min_change=min_change)
    sys.stderr.write("Done!\n")

    if snp_difference_matrix.shape[0] == 0:
        snp_difference_matrix = numpy.zeros_like(
            chunk_snp_difference_matrix) * 1.0
        snp_opportunity_matrix = numpy.zeros_like(snp_difference_matrix) * 1.0

    snp_difference_matrix += chunk_snp_difference_matrix
    snp_opportunity_matrix += chunk_snp_opportunity_matrix

substitution_rate = snp_difference_matrix * 1.0 / snp_opportunity_matrix

cluster_idxss = diversity_utils.cluster_samples(substitution_rate,
                                                min_d=0,
                                                max_d=1e09)
예제 #5
0
    dummy_samples, allele_counts_map, passed_sites_map, final_line_number = parse_midas_data.parse_snps(
        species_name,
        debug=debug,
        allowed_samples=snp_samples,
        allowed_genes=core_genes,
        chunk_size=chunk_size,
        initial_line_number=final_line_number)
    sys.stderr.write("Done! Loaded %d genes\n" % len(allele_counts_map.keys()))

    print len(dummy_samples), "dummy samples!"

    # Calculate fixation matrix
    sys.stderr.write("Calculating matrix of snp differences...\n")
    chunk_snp_difference_matrix, chunk_snp_opportunity_matrix = diversity_utils.calculate_fixation_matrix(
        allele_counts_map,
        passed_sites_map,
        min_change=min_change,
        allowed_genes=core_genes,
        allowed_variant_types=allowed_variant_types)  #
    sys.stderr.write("Done!\n")

    if snp_difference_matrix.shape[0] == 0:
        snp_difference_matrix = numpy.zeros_like(
            chunk_snp_difference_matrix) * 1.0
        snp_opportunity_matrix = numpy.zeros_like(snp_difference_matrix) * 1.0

    snp_difference_matrix += chunk_snp_difference_matrix
    snp_opportunity_matrix += chunk_snp_opportunity_matrix

    sys.stderr.write("Calculating singletons...\n")
    chunk_singletons = diversity_utils.calculate_singletons(
        allele_counts_map, passed_sites_map, allowed_genes=core_genes)
예제 #6
0
sample_coverage_map = {samples[i]: median_coverages[i] for i in xrange(0,len(samples))}
    
# Load SNP information for species_name
sys.stderr.write("Loading %s...\n" % species_name)
samples, allele_counts_map, passed_sites_map, last_line = parse_midas_data.parse_snps(species_name, debug)
sys.stderr.write("Done!\n")
    
median_coverages = numpy.array([sample_coverage_map[samples[i]] for i in xrange(0,len(samples))])
    
# Calculate full matrix of synonymous pairwise differences
sys.stderr.write("Calculate synonymous pi matrix...\n")
pi_matrix_syn, avg_pi_matrix_syn = diversity_utils.calculate_pi_matrix(allele_counts_map, passed_sites_map, variant_type='4D')
pis = numpy.diag(pi_matrix_syn)

# Calculate fixation matrix
fixation_matrix_syn, persite_fixation_matrix_syn = diversity_utils.calculate_fixation_matrix(allele_counts_map, passed_sites_map, variant_type='4D', min_change=min_change)
    
sys.stderr.write("Done!\n")
    
# Calculate full matrix of nonsynonymous pairwise differences
sys.stderr.write("Calculate nonsynonymous pi matrix...\n")
# Calculate allele count matrices
pi_matrix_non, avg_pi_matrix_non = diversity_utils.calculate_pi_matrix(allele_counts_map, passed_sites_map, variant_type='1D')
# Calculate fixation matrix
fixation_matrix_non, persite_fixation_matrix_non = diversity_utils.calculate_fixation_matrix(allele_counts_map, passed_sites_map, variant_type='1D', min_change=min_change)
sys.stderr.write("Done!\n")

# Only plot samples above a certain depth threshold
high_coverage_samples = samples[median_coverages>=min_coverage]

high_coverage_low_pi_samples = samples[(median_coverages>=min_coverage)*(pis<=1e-03)]