forked from kakitone/finishingTool
/
nonRedundantResolver.py
81 lines (69 loc) · 2.98 KB
/
nonRedundantResolver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import alignerRobot
import IORobot
import houseKeeper
# ## 0) Preprocess by removing embedded contigs (I: contigs.fasta ; O : noEmbed.fasta)
def removeEmbedded(folderName , mummerLink):
print "removeEmbedded"
thres = 10
os.system("sed -e 's/|//g' " + folderName + "contigs.fasta > " + folderName + "contigs2.fasta")
os.system("cp " + folderName + "contigs2.fasta " + folderName + "contigs.fasta")
if not os.path.isfile(folderName + "selfOut"):
alignerRobot.useMummerAlignBatch(mummerLink, folderName, [["self", "contigs.fasta", "contigs.fasta", ""]], houseKeeper.globalParallel )
# alignerRobot.useMummerAlign(mummerLink, folderName, "self", "contigs.fasta", "contigs.fasta")
# outputName, referenceName, queryName, specialName
lenDic = IORobot.obtainLength(folderName, 'contigs.fasta')
removeList = alignerRobot.extractMumDataAndRemove(folderName,"selfOut",lenDic,thres)
nameList = []
for eachitem in lenDic:
nameList.append(eachitem)
print len(nameList)
for eachitem in removeList:
if eachitem in nameList:
nameList.remove(eachitem)
print len(nameList)
IORobot.putListToFileO(folderName, "contigs.fasta", "noEmbed", nameList)
#def removeEmbedded(folderName , mummerLink):
# print "removeEmbedded"
# thres = 10
# os.system("sed -e 's/|//g' " + folderName + "contigs.fasta > " + folderName + "contigs2.fasta")
# os.system("cp " + folderName + "contigs2.fasta " + folderName + "contigs.fasta")
# if True:
# alignerRobot.useMummerAlignBatch(mummerLink, folderName, [["self", "contigs.fasta", "contigs.fasta", ""]], houseKeeper.globalParallel )
# # alignerRobot.useMummerAlign(mummerLink, folderName, "self", "contigs.fasta", "contigs.fasta")
# # outputName, referenceName, queryName, specialName
#
# dataList = alignerRobot.extractMumData(folderName, "selfOut")
#
# dataList = alignerRobot.transformCoor(dataList)
#
# lenDic = IORobot.obtainLength(folderName, 'contigs.fasta')
#
# removeList = []
# for eachitem in dataList:
# match1, match2, name1, name2 = eachitem[4], eachitem[5], eachitem[7], eachitem[8]
#
# if name1 != name2:
# l1, l2 = lenDic[name1], lenDic[name2]
#
# if abs(l1 - match1) < thres and abs(l2 - match2) > thres:
# removeList.append(name1)
# elif abs(l1 - match1) > thres and abs(l2 - match2) < thres:
# removeList.append(name2)
# elif abs(l1 - match1) < thres and abs(l2 - match2) < thres:
# print "Both shortembedd", eachitem
#
#
#
# nameList = []
# for eachitem in lenDic:
# nameList.append(eachitem)
#
# print len(nameList)
#
# for eachitem in removeList:
# if eachitem in nameList:
# nameList.remove(eachitem)
# print len(nameList)
#
# IORobot.putListToFileO(folderName, "contigs.fasta", "noEmbed", nameList)