Esempio n. 1
0
def _get_major_allele(snv):
    'It returns the most frequent allele'
    alleles = snv.qualifiers['alleles']
    major_number_reads = None
    most_freq_allele = None
    for allele in alleles:
        number_reads = _allele_count(allele, alleles)
        if major_number_reads is None or major_number_reads < number_reads:
            major_number_reads = number_reads
            most_freq_allele = allele
    return most_freq_allele[0]
Esempio n. 2
0
    def _create_info(self, qualifiers, alternative_alleles):
        'It creates the INFO bit on the vcf'
        toprint_items = []

        alleles = qualifiers['alleles']

        #RC allele count in genotypes, for each ALT allele, in the same order as
        #listed
        acounts = [] #allele_count
        for allele in alternative_alleles:
            acount = _allele_count(allele, alleles, group_kind='read_groups')
            acounts.append(acount)
        if acounts:
            toprint_items.append('RC=%s' % ','.join(map(str, acounts)))

        #AF allele frequency for each ALT allele in the same order as listed:
        reference_allele = qualifiers['reference_allele'], INVARIANT
        if reference_allele in alleles:
            ref_count = _allele_count(reference_allele, alleles,
                                      group_kind='read_groups')
        else:
            ref_count = 0
        total_count = float(sum(acounts) + ref_count)
        afreqs = [acount / total_count for acount in acounts]
        if afreqs:
            toprint_items.append('AF=%s' % ','.join(map(lambda x: '%.1f' % x,
                                                        afreqs)))

        #MQ RMS mapping quality, e.g. MQ=52
        #BQ RMS base quality at this position
        for kind, strfmt in (('mapping_quality', 'MQ=%.2f'),
                             ('quality', 'BQ=%.2f')):
            qual = qualifiers[kind]
            if qual is not None:
                toprint_items.append(strfmt % qual)

        #genotype count
        #we count in how many genotypes every allele has been found.
        allele_counts = self._allele_count_by_group(alleles=alleles,
                                           reference_allele=reference_allele[0],
                                        alternative_alleles=alternative_alleles,
                                          read_groups=qualifiers['read_groups'],
                                          count_reads=False)
        #if some allele is missing there are 0 counts of it
        n_als = max(allele_counts.keys()) + 1
        genotype_counts = [(al, len(allele_counts.get(al, [])))for al in range(n_als)]

        #now we print
        counts = [str(count[1]) for count in genotype_counts]
        toprint_items.append('GC=%s' % (','.join(counts)))
        #genotype polymorphism
        #1 - (number_groups_for_the_allele_with_more_groups / number_groups)
        number_of_groups = sum([count[1] for count in genotype_counts])

        genotype_polymorphism = 1 - genotype_counts[0][1] / float(number_of_groups)
        toprint_items.append('GP=%.2f' % genotype_polymorphism)

        #cap enzymes
        if 'cap_enzymes' in qualifiers and qualifiers['cap_enzymes']:
            to_print = 'EZ=%s' % ','.join(qualifiers['cap_enzymes'])
            toprint_items.append(to_print)

        if toprint_items:
            return ';'.join(toprint_items)
        else:
            return '.'