final_line_number = 0
while final_line_number >= 0:

    sys.stderr.write("Loading chunk starting @ %d...\n" % final_line_number)
    dummy_samples, allele_counts_map, passed_sites_map, final_line_number = parse_midas_data.parse_snps(
        species_name,
        debug=debug,
        allowed_samples=snp_samples,
        chunk_size=chunk_size,
        initial_line_number=final_line_number)
    sys.stderr.write("Done! Loaded %d genes\n" % len(allele_counts_map.keys()))

    # Calculate fixation matrix
    sys.stderr.write("Calculating matrix of snp differences...\n")
    chunk_snp_difference_matrix, chunk_snp_opportunity_matrix = diversity_utils.calculate_fixation_matrix(
        allele_counts_map, passed_sites_map, min_change=min_change)
    sys.stderr.write("Done!\n")

    if snp_difference_matrix.shape[0] == 0:
        snp_difference_matrix = numpy.zeros_like(
            chunk_snp_difference_matrix) * 1.0
        snp_opportunity_matrix = numpy.zeros_like(snp_difference_matrix) * 1.0

    snp_difference_matrix += chunk_snp_difference_matrix
    snp_opportunity_matrix += chunk_snp_opportunity_matrix

    sys.stderr.write("Calculating genotype matrix for chunk...\n")
    for gene_name in allele_counts_map.keys():
        for variant_type in allele_counts_map[gene_name].keys():

            if len(allele_counts_map[gene_name][variant_type]['alleles']) == 0:
median_coverages = numpy.array(
    [sample_coverage_map[samples[i]] for i in xrange(0, len(samples))])

# Calculate full matrix of synonymous pairwise differences
sys.stderr.write("Calculate synonymous pi matrix...\n")
pi_matrix_syn, avg_pi_matrix_syn = diversity_utils.calculate_pi_matrix(
    allele_counts_map,
    passed_sites_map,
    variant_type='4D',
    allowed_genes=metaphlan2_genes)

# Calculate fixation matrix
fixation_matrix_syn = diversity_utils.calculate_fixation_matrix(
    allele_counts_map,
    passed_sites_map,
    variant_type='4D',
    min_change=min_change,
    allowed_genes=metaphlan2_genes)

sys.stderr.write("Done!\n")

# Calculate full matrix of nonsynonymous pairwise differences
sys.stderr.write("Calculate nonsynonymous pi matrix...\n")
# Calculate allele count matrices
pi_matrix_non, avg_pi_matrix_non = diversity_utils.calculate_pi_matrix(
    allele_counts_map,
    passed_sites_map,
    variant_type='1D',
    allowed_genes=metaphlan2_genes)
# Calculate fixation matrix
fixation_matrix_non = diversity_utils.calculate_fixation_matrix(
    sys.stderr.write("Loading chunk starting @ %d...\n" % final_line_number)
    snp_samples, allele_counts_map, passed_sites_map, final_line_number = parse_midas_data.parse_snps(
        species_name,
        debug=debug,
        allowed_variant_types=allowed_variant_types,
        allowed_samples=largest_clade_samples,
        allowed_genes=core_genes,
        chunk_size=chunk_size,
        initial_line_number=final_line_number)
    sys.stderr.write("Done! Loaded %d genes\n" % len(allele_counts_map.keys()))

    # Calculate fixation matrix
    sys.stderr.write("Calculating matrix of snp differences...\n")
    chunk_snp_difference_matrix, chunk_snp_opportunity_matrix = diversity_utils.calculate_fixation_matrix(
        allele_counts_map,
        passed_sites_map,
        allowed_genes=core_genes,
        min_change=min_change)
    sys.stderr.write("Done!\n")

    if snp_difference_matrix.shape[0] == 0:
        snp_difference_matrix = numpy.zeros_like(
            chunk_snp_difference_matrix) * 1.0
        snp_opportunity_matrix = numpy.zeros_like(snp_difference_matrix) * 1.0
        synonymous_difference_matrix = numpy.zeros_like(snp_difference_matrix)
        synonymous_opportunity_matrix = numpy.zeros_like(snp_difference_matrix)
        nonsynonymous_difference_matrix = numpy.zeros_like(
            snp_difference_matrix)
        nonsynonymous_opportunity_matrix = numpy.zeros_like(
            snp_difference_matrix)
Beispiel #4
0
        sys.stderr.write("Analyzing %d haploid samples...\n" %
                         len(desired_samples))

    species_idx += 1

    # Load SNP information for species_name
    sys.stderr.write("Loading %s...\n" % species_name)
    dummy_samples, allele_counts_map, passed_sites_map = parse_midas_data.parse_snps(
        species_name, debug=debug, allowed_samples=desired_samples)
    sys.stderr.write("Done!\n")

    # Calculate fixation matrices
    sys.stderr.write("Calculating 4D fixation matrix...\n")
    fixation_matrix_syn, fixation_opportunities_syn = diversity_utils.calculate_fixation_matrix(
        allele_counts_map,
        passed_sites_map,
        allowed_variant_types=set(['4D']),
        min_change=min_change)
    sys.stderr.write("Calculating 1D fixation matrix...\n")
    fixation_matrix_non, fixation_opportunities_non = diversity_utils.calculate_fixation_matrix(
        allele_counts_map,
        passed_sites_map,
        allowed_variant_types=set(['1D']),
        min_change=min_change)
    sys.stderr.write("Calculating total fixation matrix...\n")
    fixation_matrix_all, fixation_opportunities_all = diversity_utils.calculate_fixation_matrix(
        allele_counts_map, passed_sites_map, min_change=min_change)

    sys.stderr.write("Done!\n")

    # Calculate fraction nonsynonymous
    dummy_samples, allele_counts_map, passed_sites_map, final_line_number = parse_midas_data.parse_snps(
        species_name,
        debug=debug,
        allowed_samples=snp_samples,
        allowed_genes=core_genes,
        chunk_size=chunk_size,
        initial_line_number=final_line_number)
    sys.stderr.write("Done! Loaded %d genes\n" % len(allele_counts_map.keys()))

    print len(dummy_samples), "dummy samples!"

    # Calculate fixation matrix
    sys.stderr.write("Calculating matrix of snp differences...\n")
    chunk_snp_difference_matrix, chunk_snp_opportunity_matrix = diversity_utils.calculate_fixation_matrix(
        allele_counts_map,
        passed_sites_map,
        min_change=min_change,
        allowed_genes=core_genes,
        allowed_variant_types=allowed_variant_types)  #
    sys.stderr.write("Done!\n")

    if snp_difference_matrix.shape[0] == 0:
        snp_difference_matrix = numpy.zeros_like(
            chunk_snp_difference_matrix) * 1.0
        snp_opportunity_matrix = numpy.zeros_like(snp_difference_matrix) * 1.0

    snp_difference_matrix += chunk_snp_difference_matrix
    snp_opportunity_matrix += chunk_snp_opportunity_matrix

    sys.stderr.write("Calculating singletons...\n")
    chunk_singletons = diversity_utils.calculate_singletons(
        allele_counts_map, passed_sites_map, allowed_genes=core_genes)
Beispiel #6
0
    species_name, debug)
sys.stderr.write("Done!\n")

median_coverages = numpy.array(
    [sample_coverage_map[samples[i]] for i in xrange(0, len(samples))])

# Calculate full matrix of synonymous pairwise differences
sys.stderr.write("Calculate synonymous pi matrix...\n")
pi_matrix_syn, avg_pi_matrix_syn = diversity_utils.calculate_pi_matrix(
    allele_counts_map, passed_sites_map, variant_type='4D')
pis = numpy.diag(pi_matrix_syn)

# Calculate fixation matrix
fixation_matrix_syn, persite_fixation_matrix_syn = diversity_utils.calculate_fixation_matrix(
    allele_counts_map,
    passed_sites_map,
    variant_type='4D',
    min_change=min_change)

sys.stderr.write("Done!\n")

# Calculate full matrix of nonsynonymous pairwise differences
sys.stderr.write("Calculate nonsynonymous pi matrix...\n")
# Calculate allele count matrices
pi_matrix_non, avg_pi_matrix_non = diversity_utils.calculate_pi_matrix(
    allele_counts_map, passed_sites_map, variant_type='1D')
# Calculate fixation matrix
fixation_matrix_non, persite_fixation_matrix_non = diversity_utils.calculate_fixation_matrix(
    allele_counts_map,
    passed_sites_map,
    variant_type='1D',