sys.stderr.write("Calculating unique samples...\n") # Only consider one sample per person snp_samples = snp_samples[parse_midas_data.calculate_unique_samples( subject_sample_map, sample_list=snp_samples)] if len(snp_samples) < min_sample_size: sys.stderr.write("Not enough unique samples!\n") continue # Load divergence matrices sys.stderr.write("Loading pre-computed substitution rates for %s...\n" % species_name) substitution_rate_map = calculate_substitution_rates.load_substitution_rate_map( species_name) sys.stderr.write("Calculating matrices...\n") dummy_samples, syn_difference_matrix, syn_opportunity_matrix = calculate_substitution_rates.calculate_matrices_from_substitution_rate_map( substitution_rate_map, '4D', allowed_samples=snp_samples) dummy_samples, non_difference_matrix, non_opportunity_matrix = calculate_substitution_rates.calculate_matrices_from_substitution_rate_map( substitution_rate_map, '1D', allowed_samples=snp_samples) snp_samples = dummy_samples syn_differences[species_name] = [] syn_pseudocounts[species_name] = [] syn_opportunities[species_name] = [] non_differences[species_name] = [] non_pseudocounts[species_name] = [] non_opportunities[species_name] = [] for i in xrange(0, syn_difference_matrix.shape[0]): for j in xrange(i + 1, syn_difference_matrix.shape[0]):
snp_samples = [] for sample_name in haploid_samples: if sample_country_map[sample_name] == 'United Kingdom': snp_samples.append(sample_name) if len(snp_samples) < 10: sys.stderr.write("Not enough unique samples!\n") continue # Load divergence matrices sys.stderr.write("Loading pre-computed substitution rates for %s...\n" % species_name) substitution_rate_map = calculate_substitution_rates.load_substitution_rate_map( species_name) sys.stderr.write("Calculating matrix...\n") dummy_samples, snp_difference_matrix, snp_opportunity_matrix = calculate_substitution_rates.calculate_matrices_from_substitution_rate_map( substitution_rate_map, 'core', allowed_samples=snp_samples) snp_samples = dummy_samples sys.stderr.write("Done!\n") snp_substitution_matrix = snp_difference_matrix * 1.0 / ( snp_opportunity_matrix + (snp_opportunity_matrix == 0)) closest_snp_substitution_rates = [] pair_snp_substitution_rates = [] for i in xrange(0, snp_opportunity_matrix.shape[0]): min_substitution_rate = 1e09 for j in xrange(0, snp_opportunity_matrix.shape[0]):
sys.stderr.write("Calculating SNV matrix...\n") dummy_samples, snp_mut_difference_matrix, snp_rev_difference_matrix, snp_mut_opportunity_matrix, snp_rev_opportunity_matrix = calculate_substitution_rates.calculate_mutrev_matrices_from_substitution_rate_map(substitution_rate_map, 'all', allowed_samples=snp_samples) snp_samples = dummy_samples gene_samples, gene_loss_difference_matrix, gene_gain_difference_matrix, gene_loss_opportunity_matrix, gene_gain_opportunity_matrix = calculate_substitution_rates.calculate_mutrev_matrices_from_substitution_rate_map(substitution_rate_map, 'genes', allowed_samples=snp_samples) gene_difference_matrices = {'gains': gene_gain_difference_matrix, 'losses': gene_loss_difference_matrix} gene_opportunity_matrix = gene_loss_opportunity_matrix opportunity_matrices = {} difference_matrices = {} for var_type in variant_types: dummy_samples, difference_matrix, opportunity_matrix = calculate_substitution_rates.calculate_matrices_from_substitution_rate_map(substitution_rate_map, var_type, allowed_samples=snp_samples) difference_matrices[var_type] = difference_matrix opportunity_matrices[var_type] = opportunity_matrix difference_matrices['muts'] = snp_mut_difference_matrix difference_matrices['revs'] = snp_rev_difference_matrix opportunity_matrices['muts'] = snp_mut_opportunity_matrix opportunity_matrices['revs'] = snp_rev_opportunity_matrix snp_difference_matrix = snp_mut_difference_matrix+snp_rev_difference_matrix snp_opportunity_matrix = snp_mut_opportunity_matrix+snp_rev_opportunity_matrix gene_difference_matrix = gene_gain_difference_matrix + gene_loss_difference_matrix snp_substitution_rate = snp_difference_matrix*1.0/(snp_opportunity_matrix+(snp_opportunity_matrix==0))