# Load subject and sample metadata
sys.stderr.write("Loading HMP metadata...\n")
subject_sample_map = parse_midas_data.parse_subject_sample_map()
sys.stderr.write("Done!\n")

# Load time metadata
subject_sample_time_map_all_samples = parse_midas_data.parse_subject_sample_time_map(
)

######################
# Load coverage data #
######################

# Load genomic coverage distributions
sample_coverage_histograms, samples = parse_midas_data.parse_coverage_distribution(
    species_name)
median_coverages = numpy.array([
    stats_utils.calculate_median_from_histogram(sample_coverage_histogram)
    for sample_coverage_histogram in sample_coverage_histograms
])
sample_coverage_map = {
    samples[i]: median_coverages[i]
    for i in xrange(0, len(samples))
}

# prune time meta data so that the highest coverage sample is retained for those subjects with >1 sample per time pt
subject_sample_time_map = parse_midas_data.prune_subject_sample_time_map(
    subject_sample_time_map_all_samples, sample_coverage_map)

###############################################################
# Compute Pi within patients to figure out which are haploid  #
from parsers import parse_midas_data

species_name = sys.argv[1]

min_change = 0.8

# load list of metaphlan2 genes
metaphlan2_genes = parse_midas_data.load_metaphlan2_genes(species_name)

# Load subject and sample metadata
sys.stderr.write("Loading HMP metadata...\n")
subject_sample_map = parse_midas_data.parse_subject_sample_map()
sys.stderr.write("Done!\n")

# Load genomic coverage distributions
sample_coverage_histograms, samples = parse_midas_data.parse_coverage_distribution(
    species_name, combination_type="sample")
median_coverages = numpy.array([
    stats_utils.calculate_median_from_histogram(sample_coverage_histogram)
    for sample_coverage_histogram in sample_coverage_histograms
])

sample_coverage_map = {
    samples[i]: median_coverages[i]
    for i in xrange(0, len(samples))
}

# Load SNP information for species_name
sys.stderr.write("Loading %s...\n" % species_name)
samples, allele_counts_map, passed_sites_map = parse_midas_data.parse_snps(
    species_name, combination_type="sample", debug=False)
sys.stderr.write("Done!\n")