def makeDNAannotation(variant, transcript, reference): # Returning DNA level annotation if variant is a base substitution if variant.isSubstitution(): if transcript.strand == 1: return variant.ref + '>' + variant.alt, '' else: return variant.ref.reverseComplement( ) + '>' + variant.alt.reverseComplement(), '' # Returning DNA level annotation if variant is an insertion if variant.isInsertion(): insert = core.Sequence(variant.alt) if transcript.strand == 1: before = reference.getReference(variant.chrom, variant.pos - len(insert), variant.pos - 1) # Checking if variant is a duplication if insert == before and (variant.pos - len(insert) >= transcript.transcriptStart): if len(insert) > 4: return 'dup' + str(len(insert)), 'ins' + insert return 'dup' + insert, 'ins' + insert else: return 'ins' + insert, '' else: before = reference.getReference(variant.chrom, variant.pos, variant.pos + len(insert) - 1) # Checking if variant is a duplication if insert == before and (variant.pos + len(insert) - 1 <= transcript.transcriptEnd): if len(insert) > 4: return 'dup' + str( len(insert)), 'ins' + insert.reverseComplement() return 'dup' + insert.reverseComplement( ), 'ins' + insert.reverseComplement() else: return 'ins' + insert.reverseComplement(), '' # Returning DNA level annotation if variant is a deletion if variant.isDeletion(): if len(variant.ref) > 4: return 'del' + str(len(variant.ref)), '' if transcript.strand == 1: return 'del' + variant.ref, '' else: return 'del' + core.Sequence(variant.ref).reverseComplement(), '' # Returning DNA level annotation if variant is a complex indel if variant.isComplex(): if transcript.strand == 1: return 'delins' + variant.alt, '' else: return 'delins' + variant.alt.reverseComplement(), ''
def getReference(self, chrom, start, end): # Checking if chromosome name exists goodchrom = chrom if not goodchrom in self.fastafile.references: goodchrom = 'chr' + chrom if not goodchrom in self.fastafile.references: if chrom == 'MT': goodchrom = 'chrM' if not goodchrom in self.fastafile.references: return None else: return None # Fetching data from reference genome if end < start: return core.Sequence('') if start < 1: start = 1 if pysam.__version__ in ['0.7.7', '0.7.8', '0.8.0']: last = self.fastafile.getReferenceLength(goodchrom) else: last = self.fastafile.get_reference_length(goodchrom) if end > last: end = last seq = self.fastafile.fetch(goodchrom, start - 1, end) return core.Sequence(seq.upper())