def write(self, sequence, selected_snv_location): 'It writes a seq with the alternative alleles in one position and Ns in the others.' start = selected_snv_location - self._length end = selected_snv_location + self._length + 1 if start < 0: start = 0 if end > len(sequence): end = len(sequence) sequence = sequence[start: end] selected_snv_location -= start maf_threshold = self._maf prev_seq_end = 0 seq_to_print = '' for snv in sequence.get_features(kind='snv'): # snv start and end [start, end[. # Correcting the previous sequence slice snv_start = snv.location.start.position - start snv_end = snv.location.end.position - start # join the previous sequence to the sequence to print seq_to_print += str(sequence[prev_seq_end:snv_start].seq) prev_seq_end = snv_end if snv_start == selected_snv_location: #subtituir por allelos snv_kind = calculate_snv_kind(snv) if snv_kind != SNP: msg = "We don't know how to print anything but SNPs" raise NotImplementedError(msg) alleles = '/'.join([a[0] for a in snv.qualifiers['alleles'].keys()]) to_print = '[{0:s}]'.format(alleles) else: if maf_threshold is not None: snv_maf = calculate_maf_frequency(snv) write_abundant_allele = True if snv_maf > maf_threshold else False else: write_abundant_allele = False if write_abundant_allele: # most abundant allele to_print = _get_major_allele(snv) else: # Ns snv_kind = calculate_snv_kind(snv) if snv_kind == SNP: to_print = _snp_to_iupac(snv, sequence) elif snv_kind in (DELETION, COMPLEX, INDEL): ref_allele = snv.qualifiers['reference_allele'] to_print = ref_allele[0] + 'N' * (len(ref_allele) - 1) else: to_print = 'N' seq_to_print += to_print else: seq_to_print += str(sequence[prev_seq_end:end + 1].seq) name = sequence.name + '_' + str(selected_snv_location + 1) self.fhand.write('>%s\n%s\n' % (name, seq_to_print)) self.fhand.flush()
def _snv_to_string(snv, sequence, position): '''it writes the snv in a format with braquets and with all alleles: [A/T], [-/ATG]...''' snv_kind = calculate_snv_kind(snv) reference_allele = snv.qualifiers['reference_allele'] alleles = [allele[0] for allele in snv.qualifiers['alleles'].keys() if allele[1] != INVARIANT] alleles = set(alleles) assert reference_allele.upper() in ['A', 'T', 'C', 'G'] if snv_kind == SNP: to_print = "%s/%s" % (reference_allele, "/".join(alleles)) size = 1 elif snv_kind == COMPLEX: raise RuntimeError('Complex type not implemented') elif snv_kind == INDEL: if len(alleles) > 1: raise RuntimeError('INDEL type not implemented') elif snv_kind == INSERTION: allele = list(alleles)[0] size = 1 to_print = '-/' + allele elif snv_kind == DELETION: allele = list(alleles)[0] size = len(allele) deleted_alleles = sequence[position:position+size] to_print = '%s/-' % ''.join(deleted_alleles) to_print = '[%s]' % to_print return to_print, size
def kind_filter(sequence): 'The filter' if sequence is None: return None for snv in sequence.get_features(kind='snv'): previous_result = _get_filter_result(snv, 'by_kind', threshold=kind) if previous_result is not None: continue kind_ = calculate_snv_kind(snv) if kind != kind_: result = True else: result = False _add_filter_result(snv, 'by_kind', result, threshold=kind) return sequence
def _do_snv_stats(stats, feats, orfs): "It adds the ssr stats" some_feat = False for feat in feats: some_feat = True type_ = calculate_snv_kind(feat, detailed=True) if type_ not in stats["snvs"]["types"]: stats["snvs"]["types"][type_] = 0 stats["snvs"]["types"][type_] += 1 location = _location_to_orf(orfs, feat) if location: if location not in stats["snvs"]["locations"]: stats["snvs"]["locations"][location] = 0 stats["snvs"]["locations"][location] += 1 stats["snvs"]["n_snvs"] += 1 if some_feat: stats["snvs"]["n_seqs"] += 1
def _do_snv_stats(stats, feats, orfs): 'It adds the ssr stats' some_feat = False for feat in feats: some_feat = True type_ = calculate_snv_kind(feat, detailed=True) if type_ not in stats['snvs']['types']: stats['snvs']['types'][type_] = 0 stats['snvs']['types'][type_] += 1 location = _location_to_orf(orfs, feat) if location: if location not in stats['snvs']['locations']: stats['snvs']['locations'][location] = 0 stats['snvs']['locations'][location] += 1 stats['snvs']['n_snvs'] += 1 if some_feat: stats['snvs']['n_seqs'] += 1