def arrangeSeqBasedOnRef(motherGenome, reconstructedGenome, parameterRobot):
    
    # 1) Extract components and sort
    G = parameterRobot.G 
    fingerPrint = 20
    searchNumber = 1000

    allFingerPrint = np.zeros(( G-fingerPrint+1) *(fingerPrint+1) , dtype= np.int32).reshape(G-fingerPrint+1, fingerPrint+1)
    
    for i in range(G-fingerPrint +1 ):
        allFingerPrint[i][0:-1] = motherGenome[i:i+fingerPrint]
        allFingerPrint[i][-1] = i
        
    allFingerPrint = sorted(allFingerPrint,key = itemgetterkk(range(0, fingerPrint+1)))

    # 2) Search for appropriate hash items 
    searchItemList = []
    starterIndex = -1
    oneItemList = [] 
    hashSearch = np.zeros(fingerPrint+1 , dtype = np.int32)
    
    for i in range(searchNumber):
        foundIndex = int( min(i*len(reconstructedGenome)/float(searchNumber), len(reconstructedGenome)- fingerPrint -1  ) )
        hashSearch[0:fingerPrint+1] = reconstructedGenome[foundIndex: foundIndex +fingerPrint+1]
        hashSearch[-1] = -10

        indexInFPList = bridgeResolve.bisectkk(allFingerPrint, hashSearch) -1

        tmpItem = [] 
        #print indexInFPList
        while (0<=indexInFPList < len(allFingerPrint) and np.array_equal(allFingerPrint[indexInFPList][0:-1],hashSearch[0:-1])):
            tmpItem.append(allFingerPrint[indexInFPList][-1])
            indexInFPList = indexInFPList - 1 
        print "tmpItem", tmpItem
        searchItemList.append(tmpItem)
        if len(tmpItem) == 1 : 
            oneItemList.append(i)
        #else:
        #    print len(tmpItem)
    
    starterIndexList = []      
    print "oneItemList",oneItemList
    print "searchItemList", searchItemList
    
    for dummyIndex in oneItemList:
        tmp = int(searchItemList[dummyIndex][0] - dummyIndex*len(reconstructedGenome)/float(searchNumber))
        #print tmp
        if tmp < 0 : 
            tmp = tmp + G 
        starterIndexList.append(tmp)
    
    print "starterIndexList", starterIndexList
    starterIndex = int(np.median(starterIndexList))
    starterIndex = G - starterIndex
    print starterIndex
    
    # 3) Arrange the starter of reconstructed guy
    newArranged = np.zeros(len(reconstructedGenome))
    
    newArranged[0:len(reconstructedGenome)-starterIndex] = reconstructedGenome[starterIndex:len(reconstructedGenome)]
    newArranged[len(reconstructedGenome)-starterIndex:len(reconstructedGenome)]= reconstructedGenome[0:starterIndex]
    
    return newArranged 
Exemple #2
0
def arrangeSeqBasedOnRef(motherGenome, reconstructedGenome, parameterRobot):

    # 1) Extract components and sort
    G = parameterRobot.G
    fingerPrint = 20
    searchNumber = 1000

    allFingerPrint = np.zeros((G - fingerPrint + 1) * (fingerPrint + 1),
                              dtype=np.int32).reshape(G - fingerPrint + 1,
                                                      fingerPrint + 1)

    for i in range(G - fingerPrint + 1):
        allFingerPrint[i][0:-1] = motherGenome[i:i + fingerPrint]
        allFingerPrint[i][-1] = i

    allFingerPrint = sorted(allFingerPrint,
                            key=itemgetterkk(range(0, fingerPrint + 1)))

    # 2) Search for appropriate hash items
    searchItemList = []
    starterIndex = -1
    oneItemList = []
    hashSearch = np.zeros(fingerPrint + 1, dtype=np.int32)

    for i in range(searchNumber):
        foundIndex = int(
            min(i * len(reconstructedGenome) / float(searchNumber),
                len(reconstructedGenome) - fingerPrint - 1))
        hashSearch[0:fingerPrint +
                   1] = reconstructedGenome[foundIndex:foundIndex +
                                            fingerPrint + 1]
        hashSearch[-1] = -10

        indexInFPList = bridgeResolve.bisectkk(allFingerPrint, hashSearch) - 1

        tmpItem = []
        #print indexInFPList
        while (0 <= indexInFPList < len(allFingerPrint) and np.array_equal(
                allFingerPrint[indexInFPList][0:-1], hashSearch[0:-1])):
            tmpItem.append(allFingerPrint[indexInFPList][-1])
            indexInFPList = indexInFPList - 1
        print "tmpItem", tmpItem
        searchItemList.append(tmpItem)
        if len(tmpItem) == 1:
            oneItemList.append(i)
        #else:
        #    print len(tmpItem)

    starterIndexList = []
    print "oneItemList", oneItemList
    print "searchItemList", searchItemList

    for dummyIndex in oneItemList:
        tmp = int(searchItemList[dummyIndex][0] -
                  dummyIndex * len(reconstructedGenome) / float(searchNumber))
        #print tmp
        if tmp < 0:
            tmp = tmp + G
        starterIndexList.append(tmp)

    print "starterIndexList", starterIndexList
    starterIndex = int(np.median(starterIndexList))
    starterIndex = G - starterIndex
    print starterIndex

    # 3) Arrange the starter of reconstructed guy
    newArranged = np.zeros(len(reconstructedGenome))

    newArranged[0:len(reconstructedGenome) -
                starterIndex] = reconstructedGenome[
                    starterIndex:len(reconstructedGenome)]
    newArranged[len(reconstructedGenome) -
                starterIndex:len(reconstructedGenome
                                 )] = reconstructedGenome[0:starterIndex]

    return newArranged
Exemple #3
0
def searchFromRO(readSortedf2,searchitem):

    searchIndex = bridgeResolve.bisectkk(readSortedf2, searchitem) 
    #print "searchitem, searchresult", searchitem, readSortedf2[searchIndex-1]
    newKmerIndex = readSortedf2[searchIndex-1][2]
    return newKmerIndex