Beispiel #1
0
    def test_Test2(self):
        dataList = []
        dataList.append([
            1001, 2001, 2001, 1001, 1000, 1000, 100.0, 8000, 8000, 'Contig1',
            'Contig2'
        ])

        repeatIntervalDic = breakPointFinding.returnBkPtBoolSat(dataList)
        expectedOutput = {'Contig2': [[1001, 1016]], 'Contig1': [[2001, 2016]]}

        assert (repeatIntervalDic == expectedOutput)
Beispiel #2
0
def repeatFinder(folderName, inputName):
    '''
	Input : dataList
	Output : repeatIntervalDic
	'''
    dataList = alignerRobot.extractMumData(folderName,
                                           "self" + inputName + "Out")
    #dataList = alignerRobot.transformCoor(dataList)
    lenDic = IORobot.obtainLength(folderName, inputName + '.fasta')

    matchThres = 8000
    nonMatchThres = 2

    separationThres = 1000

    newDataList = []

    for eachitem in dataList:
        name1, name2 = eachitem[-2], eachitem[-1]
        matchLen1, matchLen2 = eachitem[4], eachitem[5]
        start1, end1, start2, end2 = eachitem[0], eachitem[1], min(
            eachitem[2:4]), max(eachitem[2:4])

        if name1 != name2 and (min(lenDic[name1] - end1, lenDic[name2] - end2)
                               > nonMatchThres or min(start1, start2) >
                               nonMatchThres) and matchLen1 > matchThres:
            newDataList.append(eachitem)
        elif name1 == name2 and abs(
                start1 - start2
        ) > matchLen1 + separationThres and matchLen1 > matchThres:
            #print eachitem
            newDataList.append(eachitem)

    #print len(newDataList), newDataList[0]
    # 351 [1, 17852, 1, 17842, 17852, 17842, 98.11, 'Segkk0', 'Segkk128']
    # assert(False)
    newDataList.sort(key=itemgetter(-2))

    if False:
        ### Old method
        repeatIntervalDic = {}
        count = 0
        for key, items in groupby(newDataList, itemgetter(-2)):

            listOfIntervals = []
            for eachsub in items:
                listOfIntervals.append([eachsub[0], eachsub[1]])

            if True:
                thres = 30
                B = intervalunion.intervalCover(listOfIntervals, thres)
                rangeList = intervalunion.reportMisAssemblyIntervals2(
                    B, lenDic[key], thres, key)
                count += len(rangeList)
                if len(rangeList) > 0:
                    repeatIntervalDic[key] = rangeList

        print "Count", count
        print repeatIntervalDic

    else:
        repeatIntervalDic = breakPointFinding.returnBkPtBoolSat(newDataList)

    return repeatIntervalDic