def write(self, sequence, selected_snv_location):
        'It writes a seq with the alternative alleles in one position and Ns in the others.'
        start = selected_snv_location - self._length
        end =  selected_snv_location + self._length + 1
        if start < 0:
            start = 0
        if end > len(sequence):
            end = len(sequence)
        sequence = sequence[start: end]

        selected_snv_location -= start
        maf_threshold = self._maf
        prev_seq_end = 0
        seq_to_print = ''
        for snv in sequence.get_features(kind='snv'):
            # snv start and end [start, end[.
            # Correcting the previous sequence slice
            snv_start = snv.location.start.position - start
            snv_end = snv.location.end.position - start
            # join the previous sequence to the sequence to print
            seq_to_print += str(sequence[prev_seq_end:snv_start].seq)
            prev_seq_end = snv_end

            if snv_start == selected_snv_location:
                #subtituir por allelos
                snv_kind = calculate_snv_kind(snv)
                if snv_kind != SNP:
                    msg = "We don't know how to print anything but SNPs"
                    raise NotImplementedError(msg)
                alleles = '/'.join([a[0] for a in snv.qualifiers['alleles'].keys()])
                to_print = '[{0:s}]'.format(alleles)
            else:
                if maf_threshold is not None:
                    snv_maf = calculate_maf_frequency(snv)
                    write_abundant_allele = True if snv_maf > maf_threshold else False
                else:
                    write_abundant_allele = False
                if write_abundant_allele:
                    # most abundant allele
                    to_print = _get_major_allele(snv)
                else:
                    # Ns
                    snv_kind = calculate_snv_kind(snv)
                    if snv_kind == SNP:
                        to_print = _snp_to_iupac(snv, sequence)
                    elif snv_kind in (DELETION, COMPLEX, INDEL):
                        ref_allele = snv.qualifiers['reference_allele']
                        to_print = ref_allele[0] + 'N' * (len(ref_allele) - 1)
                    else:
                        to_print = 'N'

            seq_to_print += to_print
        else:
            seq_to_print += str(sequence[prev_seq_end:end + 1].seq)

        name = sequence.name + '_' + str(selected_snv_location + 1)
        self.fhand.write('>%s\n%s\n' % (name, seq_to_print))
        self.fhand.flush()
def _snv_to_string(snv, sequence, position):
    '''it writes the snv in a format with braquets and with all alleles:
    [A/T], [-/ATG]...'''
    snv_kind = calculate_snv_kind(snv)

    reference_allele = snv.qualifiers['reference_allele']
    alleles = [allele[0] for allele in snv.qualifiers['alleles'].keys()  if allele[1] != INVARIANT]
    alleles = set(alleles)
    assert reference_allele.upper() in ['A', 'T', 'C', 'G']

    if snv_kind == SNP:
        to_print = "%s/%s" % (reference_allele, "/".join(alleles))
        size     = 1
    elif snv_kind == COMPLEX:
        raise RuntimeError('Complex type not implemented')
    elif snv_kind == INDEL:
        if len(alleles) > 1:
            raise RuntimeError('INDEL type not implemented')
    elif snv_kind == INSERTION:
        allele = list(alleles)[0]
        size = 1
        to_print = '-/' + allele
    elif snv_kind == DELETION:
        allele = list(alleles)[0]
        size   = len(allele)

        deleted_alleles = sequence[position:position+size]
        to_print = '%s/-' % ''.join(deleted_alleles)

    to_print = '[%s]' % to_print
    return to_print, size
    def kind_filter(sequence):
        'The filter'
        if sequence is None:
            return None
        for snv in sequence.get_features(kind='snv'):
            previous_result = _get_filter_result(snv, 'by_kind', threshold=kind)
            if previous_result is not None:
                continue

            kind_ = calculate_snv_kind(snv)
            if kind != kind_:
                result = True
            else:
                result = False
            _add_filter_result(snv, 'by_kind', result, threshold=kind)
        return sequence
Example #4
0
def _do_snv_stats(stats, feats, orfs):
    "It adds the ssr stats"

    some_feat = False
    for feat in feats:
        some_feat = True
        type_ = calculate_snv_kind(feat, detailed=True)
        if type_ not in stats["snvs"]["types"]:
            stats["snvs"]["types"][type_] = 0
        stats["snvs"]["types"][type_] += 1

        location = _location_to_orf(orfs, feat)
        if location:
            if location not in stats["snvs"]["locations"]:
                stats["snvs"]["locations"][location] = 0
            stats["snvs"]["locations"][location] += 1
        stats["snvs"]["n_snvs"] += 1
    if some_feat:
        stats["snvs"]["n_seqs"] += 1
def _do_snv_stats(stats, feats, orfs):
    'It adds the ssr stats'

    some_feat = False
    for feat in feats:
        some_feat = True
        type_ = calculate_snv_kind(feat, detailed=True)
        if type_ not in stats['snvs']['types']:
            stats['snvs']['types'][type_] = 0
        stats['snvs']['types'][type_] += 1

        location = _location_to_orf(orfs, feat)
        if location:
            if location not in stats['snvs']['locations']:
                stats['snvs']['locations'][location] = 0
            stats['snvs']['locations'][location] += 1
        stats['snvs']['n_snvs'] += 1
    if some_feat:
        stats['snvs']['n_seqs'] += 1