def cleanBarcodeTail(read1, read2, readStart1, readStart2):
    reverse1 = util.reverseComplement(readStart1)
    reverse2 = util.reverseComplement(readStart2)
    barcodeStringLen = min(len(readStart1), len(readStart2))
    r1len = len(read1[1])
    r2len = len(read2[1])
    compLen = 0
    overlap = False
    for i in xrange(barcodeStringLen):
        compLen = barcodeStringLen - i
        if compLen >= r1len or compLen >= r2len:
            continue
        distance1 = util.editDistance(read1[1][-compLen:], reverse2[i:])
        distance2 = util.editDistance(read2[1][-compLen:], reverse1[i:])
        #if the tail of one end matches the start of the other end
        #we then suspect the template is not shorter than the read length
        #so to trim them on both ends
        threshold = compLen / 5
        if distance1 <= threshold and distance2 <= threshold:
            read1[1] = read1[1][:-compLen]
            read1[3] = read1[3][:-compLen]
            read2[1] = read2[1][:-compLen]
            read2[3] = read2[3][:-compLen]
            overlap = True
            break
    if overlap:
        return compLen
    else:
        return 0
Example #2
0
def cleanBarcodeTail(read1, read2, readStart1, readStart2):
    reverse1 = util.reverseComplement(readStart1)
    reverse2 = util.reverseComplement(readStart2)
    barcodeStringLen = min(len(readStart1), len(readStart2))
    r1len  = len(read1[1])
    r2len  = len(read2[1])
    compLen = 0
    overlap = False
    for i in xrange(barcodeStringLen):
        compLen = barcodeStringLen - i
        if compLen >= r1len or compLen >= r2len:
            continue
        distance1 = util.editDistance(read1[1][-compLen:], reverse2[i:])
        distance2 = util.editDistance(read2[1][-compLen:], reverse1[i:])
        #if the tail of one end matches the start of the other end
        #we then suspect the template is not shorter than the read length
        #so to trim them on both ends
        threshold  = compLen/5
        if distance1<=threshold  and distance2<=threshold:
            read1[1] = read1[1][:-compLen]
            read1[3] = read1[3][:-compLen]
            read2[1] = read2[1][:-compLen]
            read2[3] = read2[3][:-compLen]
            overlap = True
            break;
    if overlap:
        return compLen
    else:
        return 0
	def similarity(self, response1, response2):
		response1Str = ' '.join(response1)
		response2Str = ' '.join(response2)
		l = 1.0*len(response1)+len(response2)
		overlap = (l-util.editDistance(response1Str, response2Str))/l

		return max(0, overlap)
	def compareWithIdeal(self, response, ideals):
		maxSim = 0.0
		for ideal in ideals:
			N = 1.0*max(len(response),len(ideal))
			overlap = util.editDistance(response, ideal)
			sim = 1.0 - overlap/N
			maxSim = max(maxSim, sim)
		return maxSim
	def compareWithIdeal(self, response, ideals):
		maxSim = 0.0
		responseStr = ' '.join(response)
		for ideal in ideals:
			idealStr = ' '.join(ideal)
			N = 1.0*max(len(responseStr), len(idealStr))
			overlap = util.editDistance(responseStr, idealStr)
			sim = 1.0 - overlap/N
			maxSim = max(maxSim, sim)
		return maxSim
	def similarity(self, response1, response2):
		l = 1.0*len(response1)+len(response2)
		overlap = (l-util.editDistance(response1, response2))/l

		return max(0, overlap)