def cigar2variants(cigar, sequence, tag): """From cigar to Variants in GFF format""" pos = 0 iso5p = 0 logger.debug("\nISOMIRSEA:: expanded: %s" % expand_cigar(cigar)) for l in expand_cigar(cigar): if l == "I": iso5p -= 1 elif l == "D": iso5p += 1 else: break iso3p = 0 for l in reversed(expand_cigar(cigar)): if l == "I": iso3p += 1 elif l == "D": iso3p -= 1 else: break isosnp = [] for l in expand_cigar(cigar): if l in ['A', 'T', 'C', 'G']: isosnp.append([pos, sequence[pos], l]) if l in ['D']: continue pos += 1 iso5p = "iso_5p:%s" % _fix(iso5p) if iso5p else "" if tag[-1] == "T" or iso3p < 0: iso3p = "iso_3p:%s" % _fix(iso3p) if iso3p else "" else: iso3p = "iso_add3p:%s" % iso3p if iso3p else "" variant = "" for iso in [iso5p, iso3p, _define_snp(isosnp)]: if iso: variant += "%s," % iso variant = "NA;" if not variant else variant return variant[:-1]
def cigar2variants(cigar, sequence, tag): """From cigar to Variants in GFF format""" pos = 0 iso5p = 0 logger.debug("\nISOMIRSEA:: expanded: %s" % expand_cigar(cigar)) for l in expand_cigar(cigar): if l == "I": iso5p += 1 elif l == "D": iso5p -= 1 else: break iso3p = 0 for l in reversed(expand_cigar(cigar)): if l == "I": iso3p += 1 elif l == "D": iso3p -= 1 else: break isosnp = [] for l in expand_cigar(cigar): if l in ['A', 'T', 'C', 'G' ]: isosnp.append([pos, sequence[pos], l]) if l in ['D']: continue pos += 1 iso5p = "iso_5p:%s" % _fix(iso5p) if iso5p else "" if tag[-1] == "T" or iso3p < 0: iso3p = "iso_3p:%s" % _fix(iso3p) if iso3p else "" else: iso3p = "iso_add:%s" % _fix(iso3p) if iso3p else "" variant = "" for iso in [iso5p, iso3p, _define_snp(isosnp)]: if iso: variant += "%s," % iso variant = "NA;" if not variant else variant return variant[:-1]
def test_cigar(self): """testing cigar correction function""" cigar = [[0, 14], [1, 1], [0, 5]] from mirtop.mirna.realign import cigar_correction, make_cigar, \ cigar2snp, expand_cigar fixed = cigar_correction(cigar, "AAAAGCTGGGTTGAGGAGGA", "AAAAGCTGGGTTGAGAGGA") if not fixed[0] == "AAAAGCTGGGTTGAGGAGGA": raise ValueError("Sequence 1 is not right.") if not fixed[1] == "AAAAGCTGGGTTGA-GAGGA": raise ValueError("Sequence 2 is not right.") if not make_cigar("AAA-AAATAAA", "AGACAAA-AAA") == "MAMD3MI3M": raise ValueError("Cigar not eq to MAMD3MI3M: %s" % make_cigar("AAA-AAATAAA", "AGACAAA-AAA")) # test expand cigar if not expand_cigar("3MA3M") == "MMMAMMM": raise ValueError("Cigar 3MA3M not eqaul to MMMAMMM but to %s" % expand_cigar("3MA3M")) # test cigar to snp if not cigar2snp("3MA3M", "AAATCCC")[0] == [3, "A", "T"]: raise ValueError("3MA3M not equal AAATCCC but %s" % cigar2snp("3MA3M", "AAATCCC"))