def cleanBarcodeTail(read1, read2, readStart1, readStart2): reverse1 = util.reverseComplement(readStart1) reverse2 = util.reverseComplement(readStart2) barcodeStringLen = min(len(readStart1), len(readStart2)) r1len = len(read1[1]) r2len = len(read2[1]) compLen = 0 overlap = False for i in xrange(barcodeStringLen): compLen = barcodeStringLen - i if compLen >= r1len or compLen >= r2len: continue distance1 = util.editDistance(read1[1][-compLen:], reverse2[i:]) distance2 = util.editDistance(read2[1][-compLen:], reverse1[i:]) #if the tail of one end matches the start of the other end #we then suspect the template is not shorter than the read length #so to trim them on both ends threshold = compLen / 5 if distance1 <= threshold and distance2 <= threshold: read1[1] = read1[1][:-compLen] read1[3] = read1[3][:-compLen] read2[1] = read2[1][:-compLen] read2[3] = read2[3][:-compLen] overlap = True break if overlap: return compLen else: return 0
def cleanBarcodeTail(read1, read2, readStart1, readStart2): reverse1 = util.reverseComplement(readStart1) reverse2 = util.reverseComplement(readStart2) barcodeStringLen = min(len(readStart1), len(readStart2)) r1len = len(read1[1]) r2len = len(read2[1]) compLen = 0 overlap = False for i in xrange(barcodeStringLen): compLen = barcodeStringLen - i if compLen >= r1len or compLen >= r2len: continue distance1 = util.editDistance(read1[1][-compLen:], reverse2[i:]) distance2 = util.editDistance(read2[1][-compLen:], reverse1[i:]) #if the tail of one end matches the start of the other end #we then suspect the template is not shorter than the read length #so to trim them on both ends threshold = compLen/5 if distance1<=threshold and distance2<=threshold: read1[1] = read1[1][:-compLen] read1[3] = read1[3][:-compLen] read2[1] = read2[1][:-compLen] read2[3] = read2[3][:-compLen] overlap = True break; if overlap: return compLen else: return 0
def similarity(self, response1, response2): response1Str = ' '.join(response1) response2Str = ' '.join(response2) l = 1.0*len(response1)+len(response2) overlap = (l-util.editDistance(response1Str, response2Str))/l return max(0, overlap)
def compareWithIdeal(self, response, ideals): maxSim = 0.0 for ideal in ideals: N = 1.0*max(len(response),len(ideal)) overlap = util.editDistance(response, ideal) sim = 1.0 - overlap/N maxSim = max(maxSim, sim) return maxSim
def compareWithIdeal(self, response, ideals): maxSim = 0.0 responseStr = ' '.join(response) for ideal in ideals: idealStr = ' '.join(ideal) N = 1.0*max(len(responseStr), len(idealStr)) overlap = util.editDistance(responseStr, idealStr) sim = 1.0 - overlap/N maxSim = max(maxSim, sim) return maxSim
def similarity(self, response1, response2): l = 1.0*len(response1)+len(response2) overlap = (l-util.editDistance(response1, response2))/l return max(0, overlap)