Esempio n. 1
0
def makegcmat(fastafile,sizef,step,achr,savename):
	gcdict=gccontentforonechr(fastafile,sizef,step)
	gcvec=gcdict[achr]
	L=len(gcvec)
	mat=np.zeros((L,L))
	i=0
	j=0
	while i<L:
		while j<L:
			val=(float(gcvec[i])+float(gcvec[j]))/2
			mat[i,j]=val
			mat[j,i]=val
			j+=1
		i+=1
		j=i
	i=0
	vec=[0]*L
	while i<L:
		vec[i]+=np.mean(np.diag(mat,k=i))
		i+=1
	utils.savealist(vec,savename+"gcfd")
def generatedoublelistofset(filename,windowsize):
	namedump=filename+"ListedList.dump" #remember: finelame include the chr
	namedumpsizelist=filename+"ListedSizeList.dump"
	namedistancehist=filename+"DistanceofTElist.txt"
	if op.exists(namedump):
		print("===> Set deja existants")
		setlist=pickle.load(open(namedump,"rb"))
		sizelist=pickle.load(open(namedumpsizelist,"rb"))
	else:
		filein=open(filename,"r")
		setlist=list()
		sizelist=list()
		distlist=list()
		l=filein.readline()
		l=filein.readline()
		i=0
		tampolist=list() #init 
		while l:
			te=l.split()[1] #1 name 3 family
			if i>=windowsize: #most of the case
				dend=float(l.split()[5]) 
				i=0
				#theset=set(tampolist)
				setlist.append(tampolist)
				sizelist.append(i)
				distlist.append(dend-dinit)
				tampolist=list() #back to no one
			tampolist.append(te)
			if i==0:
				dinit=float(l.split()[4]) 
			i+=1
			l=filein.readline()
		filein.close()
		print("====>Dumping de la liste des sets")
		pickle.dump(setlist,open(namedump,"wb"))
		pickle.dump(sizelist,open(namedumpsizelist,"wb"))
		utils.savealist(distlist,namedistancehist)
		print("====> Sauvegarge de la liste des sets en texte")
		utils.savelistofset(setlist,filename+"SetList.txt")
	return setlist,sizelist