def arrangeSeqBasedOnRef(motherGenome, reconstructedGenome, parameterRobot): # 1) Extract components and sort G = parameterRobot.G fingerPrint = 20 searchNumber = 1000 allFingerPrint = np.zeros(( G-fingerPrint+1) *(fingerPrint+1) , dtype= np.int32).reshape(G-fingerPrint+1, fingerPrint+1) for i in range(G-fingerPrint +1 ): allFingerPrint[i][0:-1] = motherGenome[i:i+fingerPrint] allFingerPrint[i][-1] = i allFingerPrint = sorted(allFingerPrint,key = itemgetterkk(range(0, fingerPrint+1))) # 2) Search for appropriate hash items searchItemList = [] starterIndex = -1 oneItemList = [] hashSearch = np.zeros(fingerPrint+1 , dtype = np.int32) for i in range(searchNumber): foundIndex = int( min(i*len(reconstructedGenome)/float(searchNumber), len(reconstructedGenome)- fingerPrint -1 ) ) hashSearch[0:fingerPrint+1] = reconstructedGenome[foundIndex: foundIndex +fingerPrint+1] hashSearch[-1] = -10 indexInFPList = bridgeResolve.bisectkk(allFingerPrint, hashSearch) -1 tmpItem = [] #print indexInFPList while (0<=indexInFPList < len(allFingerPrint) and np.array_equal(allFingerPrint[indexInFPList][0:-1],hashSearch[0:-1])): tmpItem.append(allFingerPrint[indexInFPList][-1]) indexInFPList = indexInFPList - 1 print "tmpItem", tmpItem searchItemList.append(tmpItem) if len(tmpItem) == 1 : oneItemList.append(i) #else: # print len(tmpItem) starterIndexList = [] print "oneItemList",oneItemList print "searchItemList", searchItemList for dummyIndex in oneItemList: tmp = int(searchItemList[dummyIndex][0] - dummyIndex*len(reconstructedGenome)/float(searchNumber)) #print tmp if tmp < 0 : tmp = tmp + G starterIndexList.append(tmp) print "starterIndexList", starterIndexList starterIndex = int(np.median(starterIndexList)) starterIndex = G - starterIndex print starterIndex # 3) Arrange the starter of reconstructed guy newArranged = np.zeros(len(reconstructedGenome)) newArranged[0:len(reconstructedGenome)-starterIndex] = reconstructedGenome[starterIndex:len(reconstructedGenome)] newArranged[len(reconstructedGenome)-starterIndex:len(reconstructedGenome)]= reconstructedGenome[0:starterIndex] return newArranged
def arrangeSeqBasedOnRef(motherGenome, reconstructedGenome, parameterRobot): # 1) Extract components and sort G = parameterRobot.G fingerPrint = 20 searchNumber = 1000 allFingerPrint = np.zeros((G - fingerPrint + 1) * (fingerPrint + 1), dtype=np.int32).reshape(G - fingerPrint + 1, fingerPrint + 1) for i in range(G - fingerPrint + 1): allFingerPrint[i][0:-1] = motherGenome[i:i + fingerPrint] allFingerPrint[i][-1] = i allFingerPrint = sorted(allFingerPrint, key=itemgetterkk(range(0, fingerPrint + 1))) # 2) Search for appropriate hash items searchItemList = [] starterIndex = -1 oneItemList = [] hashSearch = np.zeros(fingerPrint + 1, dtype=np.int32) for i in range(searchNumber): foundIndex = int( min(i * len(reconstructedGenome) / float(searchNumber), len(reconstructedGenome) - fingerPrint - 1)) hashSearch[0:fingerPrint + 1] = reconstructedGenome[foundIndex:foundIndex + fingerPrint + 1] hashSearch[-1] = -10 indexInFPList = bridgeResolve.bisectkk(allFingerPrint, hashSearch) - 1 tmpItem = [] #print indexInFPList while (0 <= indexInFPList < len(allFingerPrint) and np.array_equal( allFingerPrint[indexInFPList][0:-1], hashSearch[0:-1])): tmpItem.append(allFingerPrint[indexInFPList][-1]) indexInFPList = indexInFPList - 1 print "tmpItem", tmpItem searchItemList.append(tmpItem) if len(tmpItem) == 1: oneItemList.append(i) #else: # print len(tmpItem) starterIndexList = [] print "oneItemList", oneItemList print "searchItemList", searchItemList for dummyIndex in oneItemList: tmp = int(searchItemList[dummyIndex][0] - dummyIndex * len(reconstructedGenome) / float(searchNumber)) #print tmp if tmp < 0: tmp = tmp + G starterIndexList.append(tmp) print "starterIndexList", starterIndexList starterIndex = int(np.median(starterIndexList)) starterIndex = G - starterIndex print starterIndex # 3) Arrange the starter of reconstructed guy newArranged = np.zeros(len(reconstructedGenome)) newArranged[0:len(reconstructedGenome) - starterIndex] = reconstructedGenome[ starterIndex:len(reconstructedGenome)] newArranged[len(reconstructedGenome) - starterIndex:len(reconstructedGenome )] = reconstructedGenome[0:starterIndex] return newArranged
def searchFromRO(readSortedf2,searchitem): searchIndex = bridgeResolve.bisectkk(readSortedf2, searchitem) #print "searchitem, searchresult", searchitem, readSortedf2[searchIndex-1] newKmerIndex = readSortedf2[searchIndex-1][2] return newKmerIndex