def generateorientedmatforonespecies(repository,speciesname,endofthefile):
	Matpath=repository+speciesname+".TElearnmatrix.mat"
	TEpath=repository+speciesname+".TElist"
	DEpath=repository+speciesname+".TEdict"
	DEfamilypath=repository+speciesname+".TEfamilydict"
	#dict to return
	if op.exists(TEpath):
		TElist=utils.loadfilelist(TEpath) #list of each TE in the mat
		TEfamilydict=utils.loadstrfiledict(DEfamilypath)
	else:
		TElist,TEfamilydict=utils.generatelistofTEforaspecies(repository,speciesname,endofthefile)
	#the matrix
	if op.exists(Matpath):
		mat=np.asarray(scpio.loadmat(Mathpath)['mat']) #scpio loadmat return a dict #np.array to make all code functionnal
		TEdict=utils.loadfiledict(DEpath)
	else:
		L=len(TElist)
		mat=np.zeros((2*L,2*L))
		chrlist=utils.dictchr[speciesname] #help us to loop only on good chr
		#by chr file
		print("==========>generation de la matrice d apprentisage")
		for i in chrlist:
			filename=repository+i+endofthefile
			print("=====>nom du fichier de chr: ",filename)
			filein=open(filename,"r")
			l=filein.readline()
			l=filein.readline()
			#init
			actualTE=l.split()[1]
			coordEndTEold=float(l.split()[5])
			j=TElist.index(actualTE)
			if l.split()[6][0]=="C":
				j=j+L
			l=filein.readline()
			while l:
				i=j
				ls=l.split()
				actualTE=ls[1]
				j=TElist.index(actualTE)
				coordBeginTE=float(ls[4])
				coordEndTE=float(ls[5])
				if l.split()[6][0]=="C": #orientation decision
					j=j+L
				if (coordBeginTE-coordEndTEold)<10000: #segmentation limit
					if coordBeginTE<coordEndTEold:#inclusions of one TE in another one
						mat[i,j]+=1
						mat[j,i]+=1
				else:
					mat[i,j]+=1
				coordEndTEold=coordEndTE
				l=filein.readline()
			filein.close()
		mat=filteramatrixfromrichrepeatelement(mat,TElist,True,L) #filter rich element
		toto={}#just to be un matlab savemat format
		scpio.savemat(Matpath,toto)
	return mat,TElist,TEdict,TEfamilydict
def generateTEmatrixforaspecies(repository,speciesname,endofthefile,chrsizepath,resolution):
	resolution=int(resolution) #cast of argv
	TEpath=repository+speciesname+".TElist"
	Matpath=repository+speciesname+".TEmatrix"
	TEpath=repository+speciesname+".TElist"
	DEpath=repository+speciesname+".TEdict"
	DEfamilypath=repository+speciesname+".TEfamilydict"
	#dict to return
	if op.exists(TEpath):
		TElist=utils.loadfilelist(TEpath)
		TEfamilydict=utils.loadstrfiledict(DEfamilypath)
	else:
		TElist,TEfamilydict=utils.generatelistofTEforaspecies(repository,speciesname,endofthefile)
	sizedict=utils.loadchrsizedict(chrsizepath,resolution)
	matcolsize=sizedict["HicUsedTotalSize"] #size of the matrix : ["HicUsedTotalSize"] or ["HicChrBeginchrX"]#TODO here
	mat=np.zeros((len(TElist),matcolsize))
	chrlist=utils.dictchr[speciesname] #help us to loop only on good chr
	#print(sizedict)
	print(matcolsize)
	print(chrlist)
	#by chr file
	for i in chrlist:
		filename=repository+i+endofthefile
		filein=open(filename,"r")
		l=filein.readline()
		l=filein.readline()
		ls=l.split() #just to have the first chr size before loop
		chrbegin=np.float(sizedict["HicChrBegin"+ls[0]])
		#line of the file
		while l:
			ls=l.split()
			#by bin
			begin=(np.float(ls[4])/resolution)+chrbegin
			end=(np.float(ls[5])/resolution)+chrbegin
			i=0
			#print(TElist[0],TElist[1])
			while i<np.ceil(end-begin):
				#print((end-begin),ls[1],ls[0],ls[4],ls[5],TElist.index(ls[1]),begin+i,chrbegin)
				mat[TElist.index(ls[1]),begin+i]=1
				i+=1
			l=filein.readline()
		filein.close()
	toto={}#just to be un matlab savemat format
	toto['mat']=sparse.csr_matrix(mat)
	scpio.savemat(Matpath,toto)
def makeTEPieChart(repository, speciesname, endofthefile, chrsizepath):
    outname = repository + speciesname + "TEproportion"
    sizedict = utils.loadchrsizedict(chrsizepath, 1)  # absolutely no binning stuff => resolution=1
    # load some accelerator information that we supposed to have
    TEpath = repository + speciesname + ".TElist"
    DEfamilypath = repository + speciesname + ".TEfamilydict"
    if op.exists(TEpath):
        TElist = utils.loadfilelist(TEpath)
        TEfamilydict = utils.loadstrfiledict(DEfamilypath)
    else:
        TElist, TEfamilydict = utils.generatelistofTEforaspecies(repository, speciesname, endofthefile)
    ProportionDict, TEreversefamilitydict = reservefamilydict(TEfamilydict)
    # reel algorythm
    sumchrtot = 0
    sumTE = 0  # helpfull tu calculed unmasked proportion
    chrlist = utils.dictchr[speciesname]  # in theory : chr here are same as chrsizepath #no bug ifelse
    for z in chrlist:
        filename = repository + z + endofthefile
        print("=====>nom du fichier de chr: ", filename)
        filein = open(filename, "r")
        l = filein.readline()  # first line : annotation
        l = filein.readline()
        while l:
            ls = l.split()
            val = float(ls[5]) - float(ls[4])
            if val < 0:
                print("Danger")
            ProportionDict[TEfamilydict[ls[1]]] += val
            sumTE += val
            l = filein.readline()
        sumchrtot += sizedict[z]
        filein.close()
        # print(ProportionDict)
    ProportionDict["Autre"] = sumchrtot - sumTE
    # save the dict
    fout = open(outname, "w")
    print("nom du repertoire de sortie", outname)
    fout.write("TEtype\tQuantity\n")
    for i in ProportionDict:
        s = i + "\t" + str(ProportionDict[i]) + "\n"
        fout.write(s)
    fout.close()
def generatebasicmatforonespecies(repository,speciesname,endofthefile):
	TEpath=repository+speciesname+".TElist"
	DEpath=repository+speciesname+".TEdict"
	DEfamilypath=repository+speciesname+".TEfamilydict"
	Matpath=repository+speciesname+".TElearnmatrix.mat"
	insertionouput=open(repository+speciesname+"insertionTElist","w")
	#dict to return
	if op.exists(TEpath):
		TElist=utils.loadfilelist(TEpath)
		TEfamilydict=utils.loadstrfiledict(DEfamilypath)
	else:
		TElist,TEfamilydict=utils.generatelistofTEforaspecies(repository,speciesname,endofthefile)
		#the matrix
	if op.exists(Matpath):
		mat=scpio.loadmat(Matpath)['mat'] #scpio loadmat return a dict #np.asarray to make all code functionnal
		TEdict=utils.loadfiledict(DEpath)
	else:
		L=len(TElist)
		mat=np.zeros((L,L))
		TEdict=dict()
		TEdict["sum"]=0
		chrlist=utils.dictchr[speciesname] #help us to loop only on good chr
		#by chr file
		print("==========>generation de la matrice d apprentisage")
		#checksum
		k=0
		ke=0
		ki=0
		kj=0
		TEf=asum=0 #for vocabulary complexity
		for z in chrlist:
			k+=1
			filename=repository+z+endofthefile
			print("=====>nom du fichier de chr: ",filename)
			filein=open(filename,"r")
			l=filein.readline()
			l=filein.readline()
			#init
			actualTE=l.split()[1]
			TEf,asum=updateTEcomplexity(l.split()[3],TEf,asum)
			coordEndTEold=float(l.split()[5])
			j=TElist.index(actualTE)
			updateTEdict(TEdict,TElist[j],1)
			lold=l
			l=filein.readline()
			while l:
				i=j #cheat to economise one operation cause we move linearly from the chr
				ls=l.split()
				actualTE=ls[1]
				TEf,asum=updateTEcomplexity(ls[3],TEf,asum) #this is a general factor
				j=TElist.index(actualTE)
				coordBeginTE=float(ls[4])
				coordEndTE=float(ls[5])
				if (coordBeginTE-coordEndTEold)<5000: #segmentation limit
					if coordBeginTE<coordEndTEold:#inclusions of one TE in another one
						#print("===>un exemple",z,coordBeginTE,coordEndTE,actualTE)
						mat[i,j]+=2
						mat[j,i]+=1
						updateTEdict(TEdict,TElist[i],2)  #slower but most clear
						updateTEdict(TEdict,TElist[j],1)  #slower but most clear
						#j=i #CARE: IT IS AN INCLUSION BETWEEN TE
						insertionouput.write(lold.strip("\n")+"\t"+l) #not so usefull
						ki+=3
					else:
						mat[i,j]+=1
						updateTEdict(TEdict,TElist[j],1)
						kj+=1
					ke+=1
				coordEndTEold=coordEndTE
				lold=l
				l=filein.readline()
			filein.close()
		#REDUCE NUMBER OF ELEMENT IN THE MATRIX
		#print("sum:",TEdict["sum"],np.sum(mat))
		mat,TEdict=filteramatrixfromrichrepeatelement(mat,TElist,TEdict,False,L) #FILTER *RICH element 
		#CARE after that the relation N node, N-1 Edge IS NOT CONSERVE
		#save TEdict
		#print(k,ke,ki,kj) #test number of each relation by type
		dout=open(repository+speciesname+".TEdict","w")
		for i in TElist:
			if TEdict.__contains__(i):
				dout.write(i+"\t"+str(TEdict[i])+"\n") #le bug est la!!!
			else:
				TEdict[i]=0 #not present in learning graph, present in the dataset
				TEdict["sum"]+=0
				dout.write(i+"\t"+str(TEdict[i])+"\n")
		dout.write("sum\t"+str(TEdict["sum"])+"\n")
		dout.close()
		#save the matrix
		toto={}#just to be un matlab savemat format
		toto['mat']=mat
		scpio.savemat(Matpath,toto)
		print("====>Complexite du vocabulaire de l'espece: ",str(TEf/asum)," <=======") #TEdiversity
		print("====> somme apres generation de la matrice",np.sum(mat))
		insertionouput.close()
	return mat,TElist,TEdict,TEfamilydict