def generateHicListofpattern(filename,sizeList,resolution): namedump=filename+"ListedList.dump" #remember: finelame include the chr if op.exists(namedump): print("===> Set deja existants") setlist=pickle.load(open(namedump,"rb")) else: filein=open(filename,"r") setlist=generateemptylistoflist(sizeList) l=filein.readline() l=filein.readline() i=0 while l: ls=l.split() begin=(np.float(ls[4])/resolution) end=(np.float(ls[5])/resolution) i=0 #print(TElist[0],TElist[1]) if ls[3]!="Simple_repeat": while i<np.ceil(end-begin): #print(begin,i,end,ls[3]) #print((end-begin),ls[1],ls[0],ls[4],ls[5],TElist.index(ls[1]),begin+i,chrbegin) setlist[int(begin+i)].append(ls[3]) #cast is only for list indexation #1 name, 3 family_name i+=1 else: while i<np.ceil(end-begin): falsefamilyname=simplerepeattype(ls[1]) setlist[int(begin+i)].append(falsefamilyname) i+=1 l=filein.readline() filein.close() print("====>Dumping de la liste des sets") pickle.dump(setlist,open(namedump,"wb")) print("====> Sauvegarge de la liste des sets en texte") utils.savelistofset(setlist,filename+"SetList.txt") return setlist
def generatejumplistofset(filename,windowsize): namedump=filename+"SetList.dump" namedumpsizelist=filename+"Sizelist.dump" if op.exists(namedump): print("===> Set deja existants") setlist=pickle.load(open(namedump,"rb")) sizelist=pickle.load(open(namedumpsizelist,"rb")) else: filein=open(filename,"r") setlist=list() sizelist=list() l=filein.readline() l=filein.readline() i=0 theset=set() #init while l: te=l.split()[1] Ti=re.match("[A-Z]+-rich",te) Tii=re.match("[A-Z]+_rich",te) if not Ti and not Tii: if i>=windowsize: i=0 setlist.append(theset) sizelist.append(len(theset)) theset=set() theset.add(te) i+=1 l=filein.readline() filein.close() print("====>Dumping de la liste des sets") pickle.dump(setlist,open(namedump,"wb")) pickle.dump(setlist,open(namedumpsizelist,"wb")) print("====> Sauvegarge de la liste des sets en texte") utils.savelistofset(setlist,filename+"SetList.txt") return setlist,sizelist
def generatedoublelistofset(filename,windowsize): namedump=filename+"ListedList.dump" #remember: finelame include the chr namedumpsizelist=filename+"ListedSizeList.dump" namedistancehist=filename+"DistanceofTElist.txt" if op.exists(namedump): print("===> Set deja existants") setlist=pickle.load(open(namedump,"rb")) sizelist=pickle.load(open(namedumpsizelist,"rb")) else: filein=open(filename,"r") setlist=list() sizelist=list() distlist=list() l=filein.readline() l=filein.readline() i=0 tampolist=list() #init while l: te=l.split()[1] #1 name 3 family if i>=windowsize: #most of the case dend=float(l.split()[5]) i=0 #theset=set(tampolist) setlist.append(tampolist) sizelist.append(i) distlist.append(dend-dinit) tampolist=list() #back to no one tampolist.append(te) if i==0: dinit=float(l.split()[4]) i+=1 l=filein.readline() filein.close() print("====>Dumping de la liste des sets") pickle.dump(setlist,open(namedump,"wb")) pickle.dump(sizelist,open(namedumpsizelist,"wb")) utils.savealist(distlist,namedistancehist) print("====> Sauvegarge de la liste des sets en texte") utils.savelistofset(setlist,filename+"SetList.txt") return setlist,sizelist