def isNonContiguous(lst, isSorted=True): ''' Return True iff Beds are not overlapping or contiguous. Modifies list if sorted is not True ''' if not isSorted: sortByChromStartEnd(lst) lstLen = len(lst) if lstLen <= 1: return True for i in xrange(1, lstLen): prev = lst[i - 1] curr = lst[i] if curr.chrom < prev.chrom: errAbort("Bed list is not sorted.") elif curr.chrom == prev.chrom: if (curr.chromStart < prev.chromStart) or ( (curr.chromStart == prev.chromStart) and (curr.chromEnd < prev.chromEnd)): errAbort("Bed list is not sorted.") if curr.chromStart <= prev.chromEnd: print prev.chrom, prev.chromStart, prev.chromEnd return False return True
def codonValue(dna): '''Return the codon value of the dna string or 'X' if it is not a valid DNA string ''' global codonTable, NON_CODON if len(dna) != 3: errAbort("codonValue takes a 3-bp dna string as argument: %s" % dna) return codonTable.get(dna.upper(), NON_CODON)
def getDNA(chrom, start, end, fn, noMask=False): ''' Return the DNA associated with the BED-style position as a single long string ''' maskFlag = "" if noMask: maskFlag = " -noMask" cmd = ("twoBitToFa %s stdout" % fn) + maskFlag + " -seq=" proc = subprocess.Popen(cmd + "%s:%d-%d" % (chrom, start, end), shell=True, stdout=subprocess.PIPE) dna = str(proc.communicate()[0]).strip() dna = dna.split('\n') if len(dna) < 2: errAbort("Must be at least a header line and one line of DNA.") tmp = "".join(dna[1:]) return "".join(tmp.split())
def getOverlappingRegionDict(bd1, bd2, debug=True): ''' Return a dictionary of overlapping regions within two dictionaries of Bed regions ''' retval = {} for chrom, bedList1 in bd1.items(): bedList2 = bd2.get(chrom, []) if bedList2 == []: continue if debug: if not isNonContiguous(bedList1): errAbort( "Calculating overlapping regions must have non-contiguous input elements." ) if not isNonContiguous(bedList2): errAbort( "Calculating overlapping regions must have non-contiguous input elements." ) b1Len = len(bedList1) b1Idx = 0 b2Len = len(bedList2) b2Idx = 0 while (b1Idx < b1Len) and (b2Idx < b2Len): b1Curr = bedList1[b1Idx] b2Curr = bedList2[b2Idx] assert b1Curr.chrom == b2Curr.chrom maxStart = max(b1Curr.chromStart, b2Curr.chromStart) minEnd = min(b1Curr.chromEnd, b2Curr.chromEnd) if maxStart < minEnd: retval[b1Curr.chrom] = retval.get(b1Curr.chrom, []) + [ Bed.Bed("%s\t%d\t%d" % (b1Curr.chrom, maxStart, minEnd)) ] if b1Curr.chromEnd < b2Curr.chromEnd: b1Idx += 1 elif b1Curr.chromEnd > b2Curr.chromEnd: b2Idx += 1 else: b1Idx += 1 b2Idx += 1 return retval
def exonMutationType(wtDNA, mutantDNA): ''' Return the type of mutation. Assumes both wt and mutant are fully within exons ''' global NON_CODON, SILENT_MUT, MISSENSE_MUT, NONSENSE_MUT, GAIN_MUT, FS_INS, INFRAME_INS, FS_DEL, INFRAME_DEL wtLen = len(wtDNA.replace('-', '')) mutantLen = len(mutantDNA.replace('-', '')) if wtLen < mutantLen: if (mutantLen - wtLen) % 3 != 0: return FS_INS else: return INFRAME_INS elif wtLen > mutantLen: if (wtLen - mutantLen) % 3 != 0: return FS_DEL else: return INFRAME_DEL else: wtCodon = codonValue(wtDNA) mutantCodon = codonValue(mutantDNA) if (wtCodon == NON_CODON) or (mutantCodon == NON_CODON): errAbort( "Invalid DNA codons tested for mutation type. Wt: %s, mutant: %s" % (wtDNA, mutantDNA)) if not isStopCodon(wtDNA): if isStopCodon(mutantDNA): return NONSENSE_MUT else: if wtCodon == mutantCodon: return SILENT_MUT else: return MISSENSE_MUT else: if isStopCodon(mutantDNA): return SILENT_MUT else: return GAIN_MUT