def makegcmat(fastafile,sizef,step,achr,savename): gcdict=gccontentforonechr(fastafile,sizef,step) gcvec=gcdict[achr] L=len(gcvec) mat=np.zeros((L,L)) i=0 j=0 while i<L: while j<L: val=(float(gcvec[i])+float(gcvec[j]))/2 mat[i,j]=val mat[j,i]=val j+=1 i+=1 j=i i=0 vec=[0]*L while i<L: vec[i]+=np.mean(np.diag(mat,k=i)) i+=1 utils.savealist(vec,savename+"gcfd")
def generatedoublelistofset(filename,windowsize): namedump=filename+"ListedList.dump" #remember: finelame include the chr namedumpsizelist=filename+"ListedSizeList.dump" namedistancehist=filename+"DistanceofTElist.txt" if op.exists(namedump): print("===> Set deja existants") setlist=pickle.load(open(namedump,"rb")) sizelist=pickle.load(open(namedumpsizelist,"rb")) else: filein=open(filename,"r") setlist=list() sizelist=list() distlist=list() l=filein.readline() l=filein.readline() i=0 tampolist=list() #init while l: te=l.split()[1] #1 name 3 family if i>=windowsize: #most of the case dend=float(l.split()[5]) i=0 #theset=set(tampolist) setlist.append(tampolist) sizelist.append(i) distlist.append(dend-dinit) tampolist=list() #back to no one tampolist.append(te) if i==0: dinit=float(l.split()[4]) i+=1 l=filein.readline() filein.close() print("====>Dumping de la liste des sets") pickle.dump(setlist,open(namedump,"wb")) pickle.dump(sizelist,open(namedumpsizelist,"wb")) utils.savealist(distlist,namedistancehist) print("====> Sauvegarge de la liste des sets en texte") utils.savelistofset(setlist,filename+"SetList.txt") return setlist,sizelist