def multiset_banded_genotype_combinations(sample_genotypes, bandwidth): for index_combo in multiset.multichoose(len(samples), range(bandwidth)): for index_permutation in multiset.permutations(index_combo): yield [ genotypes[index] for index, genotypes in zip( index_permutation, sample_genotypes) ]
def data_likelihood_exact(genotype, observed_alleles): """'Exact' data likelihood, sum of sampling probability * join Q score for the observed alleles over all possible underlying 'true allele' combinations.""" #print "probability that observations", [o['alt'] for o in observed_alleles], "arise from genotype", genotype observation_count = len(observed_alleles) ploidy = sum([count for allele, count in genotype]) allele_probs = [count / float(ploidy) for allele, count in genotype] probs = [] # for all true allele combinations X permutations for true_allele_combination in multiset.multichoose(observation_count, [x[0] for x in genotype]): for true_allele_permutation in multiset.permutations(true_allele_combination): # this mapping allows us to use sampling_prob the same way as we do when we use JSON allele observation records true_alleles = [{'alt':allele} for allele in true_allele_permutation] allele_groups = group_alleles(true_alleles) observations = [] for allele, count in genotype: if allele_groups.has_key(allele): observations.append(len(allele_groups[allele])) else: observations.append(0) #sprob = dirichlet_maximum_likelihood_ratio(allele_probs, observations) # distribution parameter here lnsampling_prob = multinomialln(allele_probs, observations) prob = lnsampling_prob + likelihood_given_true_alleles(observed_alleles, true_alleles) #print math.exp(prob), sprob, genotype, true_allele_permutation #print genotype, math.exp(prob), sprob, true_allele_permutation, [o['alt'] for o in observed_alleles] probs.append(prob) # sum the individual probability of all combinations p = logsumexp(probs) #print math.exp(p) return p
def banded_genotype_combinations(sample_genotypes, bandwidth, band_depth): # always provide the 'best' case yield [(sample, genotypes[0]) for sample, genotypes in sample_genotypes] for i in range(1, bandwidth): for j in range(1, band_depth): # band_depth is the depth to which we explore the bandwith... TODO explain better indexes = j * [i] + (len(sample_genotypes) - j) * [0] for index_permutation in multiset.permutations(indexes): yield [(sample, genotypes[index]) for index, (sample, genotypes) in zip(index_permutation, sample_genotypes)]
def banded_genotype_combinations(sample_genotypes, bandwidth, band_depth): # always provide the 'best' case yield [(sample, genotypes[0]) for sample, genotypes in sample_genotypes] for i in range(1, bandwidth): for j in range( 1, band_depth ): # band_depth is the depth to which we explore the bandwith... TODO explain better indexes = j * [i] + (len(sample_genotypes) - j) * [0] for index_permutation in multiset.permutations(indexes): yield [(sample, genotypes[index]) for index, ( sample, genotypes) in zip(index_permutation, sample_genotypes)]
def data_likelihood_exact(genotype, observed_alleles): """'Exact' data likelihood, sum of sampling probability * join Q score for the observed alleles over all possible underlying 'true allele' combinations.""" #print "probability that observations", [o['alt'] for o in observed_alleles], "arise from genotype", genotype observation_count = len(observed_alleles) ploidy = sum([count for allele, count in genotype]) allele_probs = [count / float(ploidy) for allele, count in genotype] probs = [] # for all true allele combinations X permutations for true_allele_combination in multiset.multichoose( observation_count, [x[0] for x in genotype]): for true_allele_permutation in multiset.permutations( true_allele_combination): # this mapping allows us to use sampling_prob the same way as we do when we use JSON allele observation records true_alleles = [{ 'alt': allele } for allele in true_allele_permutation] allele_groups = group_alleles(true_alleles) observations = [] for allele, count in genotype: if allele_groups.has_key(allele): observations.append(len(allele_groups[allele])) else: observations.append(0) #sprob = dirichlet_maximum_likelihood_ratio(allele_probs, observations) # distribution parameter here lnsampling_prob = multinomialln(allele_probs, observations) prob = lnsampling_prob + likelihood_given_true_alleles( observed_alleles, true_alleles) #print math.exp(prob), sprob, genotype, true_allele_permutation #print genotype, math.exp(prob), sprob, true_allele_permutation, [o['alt'] for o in observed_alleles] probs.append(prob) # sum the individual probability of all combinations p = logsumexp(probs) #print math.exp(p) return p
def multiset_banded_genotype_combinations(sample_genotypes, bandwidth): for index_combo in multiset.multichoose(len(samples), range(bandwidth)): for index_permutation in multiset.permutations(index_combo): yield [genotypes[index] for index, genotypes in zip(index_permutation, sample_genotypes)]