final_line_number = 0 while final_line_number >= 0: sys.stderr.write("Loading chunk starting @ %d...\n" % final_line_number) dummy_samples, allele_counts_map, passed_sites_map, final_line_number = parse_midas_data.parse_snps( species_name, debug=debug, allowed_samples=snp_samples, chunk_size=chunk_size, initial_line_number=final_line_number) sys.stderr.write("Done! Loaded %d genes\n" % len(allele_counts_map.keys())) # Calculate fixation matrix sys.stderr.write("Calculating matrix of snp differences...\n") chunk_snp_difference_matrix, chunk_snp_opportunity_matrix = diversity_utils.calculate_fixation_matrix( allele_counts_map, passed_sites_map, min_change=min_change) sys.stderr.write("Done!\n") if snp_difference_matrix.shape[0] == 0: snp_difference_matrix = numpy.zeros_like( chunk_snp_difference_matrix) * 1.0 snp_opportunity_matrix = numpy.zeros_like(snp_difference_matrix) * 1.0 snp_difference_matrix += chunk_snp_difference_matrix snp_opportunity_matrix += chunk_snp_opportunity_matrix sys.stderr.write("Calculating genotype matrix for chunk...\n") for gene_name in allele_counts_map.keys(): for variant_type in allele_counts_map[gene_name].keys(): if len(allele_counts_map[gene_name][variant_type]['alleles']) == 0:
median_coverages = numpy.array( [sample_coverage_map[samples[i]] for i in xrange(0, len(samples))]) # Calculate full matrix of synonymous pairwise differences sys.stderr.write("Calculate synonymous pi matrix...\n") pi_matrix_syn, avg_pi_matrix_syn = diversity_utils.calculate_pi_matrix( allele_counts_map, passed_sites_map, variant_type='4D', allowed_genes=metaphlan2_genes) # Calculate fixation matrix fixation_matrix_syn = diversity_utils.calculate_fixation_matrix( allele_counts_map, passed_sites_map, variant_type='4D', min_change=min_change, allowed_genes=metaphlan2_genes) sys.stderr.write("Done!\n") # Calculate full matrix of nonsynonymous pairwise differences sys.stderr.write("Calculate nonsynonymous pi matrix...\n") # Calculate allele count matrices pi_matrix_non, avg_pi_matrix_non = diversity_utils.calculate_pi_matrix( allele_counts_map, passed_sites_map, variant_type='1D', allowed_genes=metaphlan2_genes) # Calculate fixation matrix fixation_matrix_non = diversity_utils.calculate_fixation_matrix(
sys.stderr.write("Loading chunk starting @ %d...\n" % final_line_number) snp_samples, allele_counts_map, passed_sites_map, final_line_number = parse_midas_data.parse_snps( species_name, debug=debug, allowed_variant_types=allowed_variant_types, allowed_samples=largest_clade_samples, allowed_genes=core_genes, chunk_size=chunk_size, initial_line_number=final_line_number) sys.stderr.write("Done! Loaded %d genes\n" % len(allele_counts_map.keys())) # Calculate fixation matrix sys.stderr.write("Calculating matrix of snp differences...\n") chunk_snp_difference_matrix, chunk_snp_opportunity_matrix = diversity_utils.calculate_fixation_matrix( allele_counts_map, passed_sites_map, allowed_genes=core_genes, min_change=min_change) sys.stderr.write("Done!\n") if snp_difference_matrix.shape[0] == 0: snp_difference_matrix = numpy.zeros_like( chunk_snp_difference_matrix) * 1.0 snp_opportunity_matrix = numpy.zeros_like(snp_difference_matrix) * 1.0 synonymous_difference_matrix = numpy.zeros_like(snp_difference_matrix) synonymous_opportunity_matrix = numpy.zeros_like(snp_difference_matrix) nonsynonymous_difference_matrix = numpy.zeros_like( snp_difference_matrix) nonsynonymous_opportunity_matrix = numpy.zeros_like( snp_difference_matrix)
sys.stderr.write("Analyzing %d haploid samples...\n" % len(desired_samples)) species_idx += 1 # Load SNP information for species_name sys.stderr.write("Loading %s...\n" % species_name) dummy_samples, allele_counts_map, passed_sites_map = parse_midas_data.parse_snps( species_name, debug=debug, allowed_samples=desired_samples) sys.stderr.write("Done!\n") # Calculate fixation matrices sys.stderr.write("Calculating 4D fixation matrix...\n") fixation_matrix_syn, fixation_opportunities_syn = diversity_utils.calculate_fixation_matrix( allele_counts_map, passed_sites_map, allowed_variant_types=set(['4D']), min_change=min_change) sys.stderr.write("Calculating 1D fixation matrix...\n") fixation_matrix_non, fixation_opportunities_non = diversity_utils.calculate_fixation_matrix( allele_counts_map, passed_sites_map, allowed_variant_types=set(['1D']), min_change=min_change) sys.stderr.write("Calculating total fixation matrix...\n") fixation_matrix_all, fixation_opportunities_all = diversity_utils.calculate_fixation_matrix( allele_counts_map, passed_sites_map, min_change=min_change) sys.stderr.write("Done!\n") # Calculate fraction nonsynonymous
dummy_samples, allele_counts_map, passed_sites_map, final_line_number = parse_midas_data.parse_snps( species_name, debug=debug, allowed_samples=snp_samples, allowed_genes=core_genes, chunk_size=chunk_size, initial_line_number=final_line_number) sys.stderr.write("Done! Loaded %d genes\n" % len(allele_counts_map.keys())) print len(dummy_samples), "dummy samples!" # Calculate fixation matrix sys.stderr.write("Calculating matrix of snp differences...\n") chunk_snp_difference_matrix, chunk_snp_opportunity_matrix = diversity_utils.calculate_fixation_matrix( allele_counts_map, passed_sites_map, min_change=min_change, allowed_genes=core_genes, allowed_variant_types=allowed_variant_types) # sys.stderr.write("Done!\n") if snp_difference_matrix.shape[0] == 0: snp_difference_matrix = numpy.zeros_like( chunk_snp_difference_matrix) * 1.0 snp_opportunity_matrix = numpy.zeros_like(snp_difference_matrix) * 1.0 snp_difference_matrix += chunk_snp_difference_matrix snp_opportunity_matrix += chunk_snp_opportunity_matrix sys.stderr.write("Calculating singletons...\n") chunk_singletons = diversity_utils.calculate_singletons( allele_counts_map, passed_sites_map, allowed_genes=core_genes)
species_name, debug) sys.stderr.write("Done!\n") median_coverages = numpy.array( [sample_coverage_map[samples[i]] for i in xrange(0, len(samples))]) # Calculate full matrix of synonymous pairwise differences sys.stderr.write("Calculate synonymous pi matrix...\n") pi_matrix_syn, avg_pi_matrix_syn = diversity_utils.calculate_pi_matrix( allele_counts_map, passed_sites_map, variant_type='4D') pis = numpy.diag(pi_matrix_syn) # Calculate fixation matrix fixation_matrix_syn, persite_fixation_matrix_syn = diversity_utils.calculate_fixation_matrix( allele_counts_map, passed_sites_map, variant_type='4D', min_change=min_change) sys.stderr.write("Done!\n") # Calculate full matrix of nonsynonymous pairwise differences sys.stderr.write("Calculate nonsynonymous pi matrix...\n") # Calculate allele count matrices pi_matrix_non, avg_pi_matrix_non = diversity_utils.calculate_pi_matrix( allele_counts_map, passed_sites_map, variant_type='1D') # Calculate fixation matrix fixation_matrix_non, persite_fixation_matrix_non = diversity_utils.calculate_fixation_matrix( allele_counts_map, passed_sites_map, variant_type='1D',