def _get_major_allele(snv): 'It returns the most frequent allele' alleles = snv.qualifiers['alleles'] major_number_reads = None most_freq_allele = None for allele in alleles: number_reads = _allele_count(allele, alleles) if major_number_reads is None or major_number_reads < number_reads: major_number_reads = number_reads most_freq_allele = allele return most_freq_allele[0]
def _create_info(self, qualifiers, alternative_alleles): 'It creates the INFO bit on the vcf' toprint_items = [] alleles = qualifiers['alleles'] #RC allele count in genotypes, for each ALT allele, in the same order as #listed acounts = [] #allele_count for allele in alternative_alleles: acount = _allele_count(allele, alleles, group_kind='read_groups') acounts.append(acount) if acounts: toprint_items.append('RC=%s' % ','.join(map(str, acounts))) #AF allele frequency for each ALT allele in the same order as listed: reference_allele = qualifiers['reference_allele'], INVARIANT if reference_allele in alleles: ref_count = _allele_count(reference_allele, alleles, group_kind='read_groups') else: ref_count = 0 total_count = float(sum(acounts) + ref_count) afreqs = [acount / total_count for acount in acounts] if afreqs: toprint_items.append('AF=%s' % ','.join(map(lambda x: '%.1f' % x, afreqs))) #MQ RMS mapping quality, e.g. MQ=52 #BQ RMS base quality at this position for kind, strfmt in (('mapping_quality', 'MQ=%.2f'), ('quality', 'BQ=%.2f')): qual = qualifiers[kind] if qual is not None: toprint_items.append(strfmt % qual) #genotype count #we count in how many genotypes every allele has been found. allele_counts = self._allele_count_by_group(alleles=alleles, reference_allele=reference_allele[0], alternative_alleles=alternative_alleles, read_groups=qualifiers['read_groups'], count_reads=False) #if some allele is missing there are 0 counts of it n_als = max(allele_counts.keys()) + 1 genotype_counts = [(al, len(allele_counts.get(al, [])))for al in range(n_als)] #now we print counts = [str(count[1]) for count in genotype_counts] toprint_items.append('GC=%s' % (','.join(counts))) #genotype polymorphism #1 - (number_groups_for_the_allele_with_more_groups / number_groups) number_of_groups = sum([count[1] for count in genotype_counts]) genotype_polymorphism = 1 - genotype_counts[0][1] / float(number_of_groups) toprint_items.append('GP=%.2f' % genotype_polymorphism) #cap enzymes if 'cap_enzymes' in qualifiers and qualifiers['cap_enzymes']: to_print = 'EZ=%s' % ','.join(qualifiers['cap_enzymes']) toprint_items.append(to_print) if toprint_items: return ';'.join(toprint_items) else: return '.'