replacement_shared_snps = {cohort: [] for cohort in cohorts}
replacement_opportunities = {cohort: [] for cohort in cohorts}

for species_name in good_species_list:

    sys.stderr.write("\nProcessing %s...\n" % species_name)

    # First we have to enumerate QP pairs in each cohort
    sys.stderr.write("Enumerating QP pairs...\n")

    # all samples
    all_samples = sample_order_map.keys()

    # list of samples that meet coverage criteria for this species
    highcoverage_samples = set(
        diversity_utils.calculate_highcoverage_samples(species_name))

    # list of samples that meet QP criteria for this species
    haploid_samples = set(
        diversity_utils.calculate_haploid_samples(species_name))

    #print len(all_samples), len(highcoverage_samples), len(haploid_samples)

    if len(haploid_samples) < config.within_host_min_haploid_sample_size:
        continue

    same_sample_idxs, same_subject_idxs, diff_subject_idxs = sample_utils.calculate_ordered_subject_pairs(
        sample_order_map, all_samples)

    hmp_sample_size = 0
コード例 #2
0
    good_species_list = parse_midas_data.parse_good_species_list()
    if species != 'all':
        good_species_list = [species]
    else:
        if debug:
            good_species_list = good_species_list[:3]

    # header for the output file.
    record_strs = []

    for species_name in good_species_list:

        sys.stderr.write("Loading samples...\n")

        # Only plot samples above a certain depth threshold that are confidently phaseable.
        snp_samples = diversity_utils.calculate_highcoverage_samples(
            species_name, min_coverage=min_coverage)

        if len(snp_samples) < 2:
            continue

        sys.stderr.write("found %d samples\n" % len(snp_samples))

        # Analyze SNPs, looping over chunk sizes.
        # Clunky, but necessary to limit memory usage on cluster

        # Load SNP information for species_name
        sys.stderr.write("Loading SNPs for %s...\n" % species_name)

        snps = []
        snp_map = {}  # contig: list of locations map