Ejemplo n.º 1
0
def startProcess(readFileList,AnnotationDir,ReadLen,readType,annotation_type):
	seq_depth=0
	#Sample = ['Brain','UHR']
	Sample = ['workfloder']
	rep=readFileList
	lanFile=[]
	for i in range(len(readFileList)):
		lanFile.append('Lane'+str(i+1))
	#Create Run path function

	targetDir = AnnotationDir
	workfloder      =  os.path.join(targetDir,'workfloder')

	####Annocation Files Path 
	GeneExonSplit   =  os.path.join(workfloder,'NLDMseq.Exon.Split')
	GeneFile        =  os.path.join(workfloder,'NLDMseq.Gene.Info')
	#####workfloder path
	geneMapout      =  os.path.join(workfloder,'ModelMultiGene_Map')
	geneExonout     =  os.path.join(workfloder,'GeneExonFile')
	GeneData_Path   =  os.path.join(workfloder,'ExtractMultiGeneData')
	GeneAbsLoc_path =  os.path.join(workfloder,'ModelMultiGene_AbsLoc')

	exonLen         =  os.path.join(workfloder,'exonLen')	 
	exonjunfile     =  os.path.join(workfloder,'GeneExonFile')	 
	spandExonsLen   =  os.path.join(workfloder,'exonLen')

	
	static_exonJunc.staticGeneNewExon(GeneExonSplit,geneMapout,geneExonout)
	getIsoLength.getIsoLen(GeneFile,GeneExonSplit,targetDir)#get isoform length
	geneExonLen.staticGeneNewExon(GeneExonSplit,exonjunfile,spandExonsLen,ReadLen)	

	plusGeneName.plusGeneName(GeneFile,readFileList,targetDir,readType,annotation_type)#covert bowtie
	
	DictIsofGene = {}
	gene_list=[]
	isoNo_list=[]
	length_list=[]
	isoName_list=[]
	f = open(GeneFile,'r')
	for line in f:
		line = line.rstrip()
		line = line.split('\t')
		gene = line[0]
		IsoNo = int(line[1])
		IsoNameList=  line[3:]
		for i in xrange(IsoNo):
			DictIsofGene[IsoNameList[i]]=gene

		
		gene_list.append(line[0])
		isoNo_list.append(int(line[1]))
		length_list.append(int(line[2]))
		isoName_list.append(line[3:])
	f.close()


    ###########CalculateProbability and ExtractGeneData  
	for i in range(len(Sample)):                           
		for j in range(len(rep)):
			InputPath = os.path.join(workfloder,'readInput')
			InputFile = os.path.join(InputPath,'Lane'+str(i)+'.plusGene')
			FileLabel = os.path.join(workfloder,'NLDMseq.Gene.Label.Isoform')
			OutputFile= os.path.join(InputPath,'Lane'+str(i))

			seq_depth=Extract_BowtieData.CalculateProbability(InputFile,OutputFile,readType)

			InputFile = OutputFile

			OutputPath= os.path.join(GeneData_Path,lanFile[j])		

			Extract_BowtieData.ExtractGeneData(InputFile,OutputPath,GeneFile,FileLabel,readType)

                          	   
	##################CalculateAbsoluteLocation	
	for l in range(len(rep)):
		CalLocFileInPath  = os.path.join(GeneData_Path,lanFile[l])
		CalLocFileOutPath = os.path.join(GeneAbsLoc_path,lanFile[l])
		Extract_CalcAbsLoc.CalculateAbsoluteLocation(GeneFile,GeneExonSplit,CalLocFileInPath,CalLocFileOutPath,readType,ReadLen)



    	  
	targetDir=AnnotationDir
	LocInputPath   = os.path.join(targetDir,'workfloder','ModelMultiGene_AbsLoc')
	exonInputFile  = os.path.join(targetDir,'workfloder','GeneExonFile')
	exonLenFile    = os.path.join(targetDir,'workfloder','exonLen')
	DataOutputPath = os.path.join(targetDir,'workfloder','ModelMultiGene_Data')
	NormDataOutput = os.path.join(targetDir,'workfloder','ModelMultiGene_NormData')	 

	#function pp_work parament list and short name:
	# 	sub_gene_list	----->sgl
	#  sub_length_list	----->sll
	#   sub_isoNo_list	----->siNol
	# sub_isoName_list	----->siNamel
	#     LocInputFile	----->LocIF
	#    exonInputFile	----->exonInF
	#      exonLenFile	----->exonLenF
	#   DataOutputPath	----->DOPath
	#   NormDataOutput	----->NDOut
	def pp_work(sgl,sll,siNol,siNamel,LocIF,exonInF,exonLenF,DOPath,NDOut):
		for i in range(len(sgl)):
			gene=sgl[i];
			length=sll[i];
			isoNo=siNol[i];
			isoName=siNamel[i];
			static_readOnExon.ModelMultiGeneDataScale(gene,length,isoNo,isoName,LocIF,exonInF, \
															exonLenF,DOPath,NDOut);
	


	for i in range(len(Sample)):                           
		    for j in range(len(rep)):
				LocInputFile = os.path.join(LocInputPath,lanFile[j])
				block_size=5000
				start=0
				end=len(gene_list)
				job_server = pp.Server()
				job_server.get_ncpus()
				jobs=[]
				endi=0;
				starti=0;
				while endi<end:
					endt=starti+block_size;
					endi=min(endt,end);
					jobs.append(job_server.submit(pp_work,(gene_list[starti:endi], \
															length_list[starti:endi],\
															isoNo_list[starti:endi],\
															isoName_list[starti:endi],\
															LocInputFile,\
															exonInputFile,\
															exonLenFile,\
															DataOutputPath,\
															NormDataOutput),\
															globals=globals()))
					starti=starti+block_size;
				for job in jobs:
					job()
				job_server.destroy()
	return (seq_depth)
Ejemplo n.º 2
0
def startProcess(readFileList, annotation_type, AnnotationDir, ReadLen,
                 readType):
    rep = readFileList
    lanFile = []
    CatFile = [
        'ExtractMultiGeneData', 'ModelMultiGene_AbsLoc', 'ModelMultiGene_Data',
        'ModelMultiGene_NormData'
    ]
    for i in range(len(readFileList)):
        lanFile.append('Lane' + str(i + 1))

    targetDir = AnnotationDir
    workfloder = os.path.join(targetDir, 'workfloder')
    annotationPath = os.path.join(targetDir, 'Annotation')

    ####Annocation Files Path
    GeneExonSplit = os.path.join(annotationPath,
                                 annotation_type + '.Exon.Split')
    GeneFile = os.path.join(annotationPath, annotation_type + '.Gene.Info')
    #####workfloder path
    geneMapout = os.path.join(workfloder, 'ModelMultiGene_Map')
    geneExonout = os.path.join(workfloder, 'GeneExonFile')
    GeneData_Path = os.path.join(workfloder, 'ExtractMultiGeneData')
    GeneAbsLoc_path = os.path.join(workfloder, 'ModelMultiGene_AbsLoc')

    exonLen = os.path.join(workfloder, 'exonLen')
    exonjunfile = os.path.join(workfloder, 'GeneExonFile')
    spandExonsLen = os.path.join(workfloder, 'exonLen')
    readNumFile = os.path.join(workfloder, 'seq_depth')

    static_exonJunc.staticGeneNewExon(GeneExonSplit, geneMapout, geneExonout)
    getIsonum.getIsonum(GeneExonSplit, targetDir)
    getIsoLength.getIsoLen(GeneFile, GeneExonSplit,
                           targetDir)  #get isoform length
    geneExonLen.staticGeneNewExon(GeneExonSplit, exonjunfile, spandExonsLen,
                                  ReadLen)

    plusGeneName.plusGeneName(GeneFile, readFileList, targetDir, readType,
                              ReadLen)  #covert bowtie

    DictIsofGene = {}
    gene_list = []
    isoNo_list = []
    length_list = []
    isoName_list = []
    f = open(GeneFile, 'r')
    for line in f:
        line = line.rstrip()
        line = line.split('\t')
        gene = line[0]
        IsoNo = int(line[1])
        IsoNameList = line[3:]
        for i in xrange(IsoNo):
            DictIsofGene[IsoNameList[i]] = gene
        gene_list.append(line[0])
        isoNo_list.append(int(line[1]))
        length_list.append(int(line[2]))
        isoName_list.append(line[3:])
    f.close()

    targetDir = AnnotationDir
    LocInputPath = os.path.join(targetDir, 'workfloder',
                                'ModelMultiGene_AbsLoc')
    exonInputFile = os.path.join(targetDir, 'workfloder', 'GeneExonFile')
    exonLenFile = os.path.join(targetDir, 'workfloder', 'exonLen')
    DataOutputPath = os.path.join(targetDir, 'workfloder',
                                  'ModelMultiGene_Data')
    NormDataOutput = os.path.join(targetDir, 'workfloder',
                                  'ModelMultiGene_NormData')
    if readType == 'Paired':
        for j in range(len(rep)):
            InputPath = os.path.join(workfloder, 'readInput')
            InputFile = os.path.join(InputPath, 'Lane' + str(j) + '.plusGene')
            OutputFile = os.path.join(GeneData_Path, lanFile[j])
            print InputFile
            print OutputFile
            PE_Extract_BowtieData.ExtractGeneBowtieData(
                InputFile, GeneFile, OutputFile, readNumFile)
        for l in range(len(rep)):
            CalLocFileInPath = os.path.join(GeneData_Path, lanFile[l])
            CalLocFileOutPath = os.path.join(GeneAbsLoc_path, lanFile[l])
            print CalLocFileInPath
            print CalLocFileOutPath
            PE_Extract_CalcAbsLoc_my.CalculateAbsoluteLocation(
                GeneFile, GeneExonSplit, CalLocFileInPath, CalLocFileOutPath,
                ReadLen)

    if readType == 'Single':
        for j in range(len(rep)):
            InputPath = os.path.join(workfloder, 'readInput')
            InputFile = os.path.join(InputPath, 'Lane' + str(j) + '.plusGene')
            OutputFile = os.path.join(GeneData_Path, lanFile[j])
            print InputFile
            print OutputFile
            SE_Extract_BowtieData.ExtractGeneBowtieData(
                InputFile, GeneFile, OutputFile, readNumFile)
        for l in range(len(rep)):
            CalLocFileInPath = os.path.join(GeneData_Path, lanFile[l])
            CalLocFileOutPath = os.path.join(GeneAbsLoc_path, lanFile[l])
            print CalLocFileInPath
            print CalLocFileOutPath
            SE_Extract_CalcAbsLoc_my.CalculateAbsoluteLocation(
                GeneFile, GeneExonSplit, CalLocFileInPath, CalLocFileOutPath,
                ReadLen)
    f_in = open(GeneFile, 'r')
    for line in f_in:
        line = line.rstrip()
        line = line.split('\t')
        gene = line[0]
        isoNo = int(line[1])
        length = int(line[2])
        isoName = line[3:]
        for j in range(len(rep)):
            LocInputPath = os.path.join(targetDir, 'workfloder')
            LocInputFile = os.path.join(LocInputPath, CatFile[1])
            LocInputFile = os.path.join(LocInputFile, lanFile[j])
            exonInputFile = os.path.join(targetDir, 'workfloder')
            exonInputFile = os.path.join(exonInputFile, 'GeneExonFile')
            exonLenFile = os.path.join(targetDir, 'workfloder')
            exonLenFile = os.path.join(exonLenFile, 'exonLen')
            DataOutputPath = os.path.join(targetDir, 'workfloder')
            DataOutputFile = os.path.join(DataOutputPath, CatFile[2])
            NormDataOutput = os.path.join(DataOutputPath, CatFile[3])
            static_readOnExon.ModelMultiGeneDataScale(
                j, gene, length, isoNo, isoName, LocInputFile, exonInputFile,
                exonLenFile, DataOutputFile, NormDataOutput)
    f_in.close()
Ejemplo n.º 3
0
if not os.path.exists(mapFile):
	 os.mkdir(mapFile)
GeneExonFile=os.path.join(targetDir,'workfloder')
GeneExonFile=os.path.join(GeneExonFile,'GeneExonFile')	 

if not os.path.exists(GeneExonFile):
	 os.mkdir(GeneExonFile)


geneMapout=mapFile
geneExonout=GeneExonFile

get_map.mapout(TargetGeneExonSplitFile,geneMapout,geneExonout)#get isoform Map

getIsoLength.getIsoLen(TargetGeneFile,TargetGeneExonSplitFile)#get isoform length

exonLen=os.path.join(targetDir,'workfloder')
exonLen=os.path.join(exonLen,'exonLen')

if not os.path.exists(exonLen):
	os.mkdir(exonLen)
ReadLen=100
exonjunfile=os.path.join(targetDir,'workfloder')
	 
exonjunfile=os.path.join(exonjunfile,'GeneExonFile')
spandExonsLen=os.path.join(targetDir,'workfloder')
	 
spandExonsLen=os.path.join(spandExonsLen,'exonLen')

SE_geneExonLen.staticGeneNewExon(TargetGeneExonSplitFile,spandExonsLen)