def startProcess(readFileList,AnnotationDir,ReadLen,readType,annotation_type): seq_depth=0 #Sample = ['Brain','UHR'] Sample = ['workfloder'] rep=readFileList lanFile=[] for i in range(len(readFileList)): lanFile.append('Lane'+str(i+1)) #Create Run path function targetDir = AnnotationDir workfloder = os.path.join(targetDir,'workfloder') ####Annocation Files Path GeneExonSplit = os.path.join(workfloder,'NLDMseq.Exon.Split') GeneFile = os.path.join(workfloder,'NLDMseq.Gene.Info') #####workfloder path geneMapout = os.path.join(workfloder,'ModelMultiGene_Map') geneExonout = os.path.join(workfloder,'GeneExonFile') GeneData_Path = os.path.join(workfloder,'ExtractMultiGeneData') GeneAbsLoc_path = os.path.join(workfloder,'ModelMultiGene_AbsLoc') exonLen = os.path.join(workfloder,'exonLen') exonjunfile = os.path.join(workfloder,'GeneExonFile') spandExonsLen = os.path.join(workfloder,'exonLen') static_exonJunc.staticGeneNewExon(GeneExonSplit,geneMapout,geneExonout) getIsoLength.getIsoLen(GeneFile,GeneExonSplit,targetDir)#get isoform length geneExonLen.staticGeneNewExon(GeneExonSplit,exonjunfile,spandExonsLen,ReadLen) plusGeneName.plusGeneName(GeneFile,readFileList,targetDir,readType,annotation_type)#covert bowtie DictIsofGene = {} gene_list=[] isoNo_list=[] length_list=[] isoName_list=[] f = open(GeneFile,'r') for line in f: line = line.rstrip() line = line.split('\t') gene = line[0] IsoNo = int(line[1]) IsoNameList= line[3:] for i in xrange(IsoNo): DictIsofGene[IsoNameList[i]]=gene gene_list.append(line[0]) isoNo_list.append(int(line[1])) length_list.append(int(line[2])) isoName_list.append(line[3:]) f.close() ###########CalculateProbability and ExtractGeneData for i in range(len(Sample)): for j in range(len(rep)): InputPath = os.path.join(workfloder,'readInput') InputFile = os.path.join(InputPath,'Lane'+str(i)+'.plusGene') FileLabel = os.path.join(workfloder,'NLDMseq.Gene.Label.Isoform') OutputFile= os.path.join(InputPath,'Lane'+str(i)) seq_depth=Extract_BowtieData.CalculateProbability(InputFile,OutputFile,readType) InputFile = OutputFile OutputPath= os.path.join(GeneData_Path,lanFile[j]) Extract_BowtieData.ExtractGeneData(InputFile,OutputPath,GeneFile,FileLabel,readType) ##################CalculateAbsoluteLocation for l in range(len(rep)): CalLocFileInPath = os.path.join(GeneData_Path,lanFile[l]) CalLocFileOutPath = os.path.join(GeneAbsLoc_path,lanFile[l]) Extract_CalcAbsLoc.CalculateAbsoluteLocation(GeneFile,GeneExonSplit,CalLocFileInPath,CalLocFileOutPath,readType,ReadLen) targetDir=AnnotationDir LocInputPath = os.path.join(targetDir,'workfloder','ModelMultiGene_AbsLoc') exonInputFile = os.path.join(targetDir,'workfloder','GeneExonFile') exonLenFile = os.path.join(targetDir,'workfloder','exonLen') DataOutputPath = os.path.join(targetDir,'workfloder','ModelMultiGene_Data') NormDataOutput = os.path.join(targetDir,'workfloder','ModelMultiGene_NormData') #function pp_work parament list and short name: # sub_gene_list ----->sgl # sub_length_list ----->sll # sub_isoNo_list ----->siNol # sub_isoName_list ----->siNamel # LocInputFile ----->LocIF # exonInputFile ----->exonInF # exonLenFile ----->exonLenF # DataOutputPath ----->DOPath # NormDataOutput ----->NDOut def pp_work(sgl,sll,siNol,siNamel,LocIF,exonInF,exonLenF,DOPath,NDOut): for i in range(len(sgl)): gene=sgl[i]; length=sll[i]; isoNo=siNol[i]; isoName=siNamel[i]; static_readOnExon.ModelMultiGeneDataScale(gene,length,isoNo,isoName,LocIF,exonInF, \ exonLenF,DOPath,NDOut); for i in range(len(Sample)): for j in range(len(rep)): LocInputFile = os.path.join(LocInputPath,lanFile[j]) block_size=5000 start=0 end=len(gene_list) job_server = pp.Server() job_server.get_ncpus() jobs=[] endi=0; starti=0; while endi<end: endt=starti+block_size; endi=min(endt,end); jobs.append(job_server.submit(pp_work,(gene_list[starti:endi], \ length_list[starti:endi],\ isoNo_list[starti:endi],\ isoName_list[starti:endi],\ LocInputFile,\ exonInputFile,\ exonLenFile,\ DataOutputPath,\ NormDataOutput),\ globals=globals())) starti=starti+block_size; for job in jobs: job() job_server.destroy() return (seq_depth)
def startProcess(readFileList, annotation_type, AnnotationDir, ReadLen, readType): rep = readFileList lanFile = [] CatFile = [ 'ExtractMultiGeneData', 'ModelMultiGene_AbsLoc', 'ModelMultiGene_Data', 'ModelMultiGene_NormData' ] for i in range(len(readFileList)): lanFile.append('Lane' + str(i + 1)) targetDir = AnnotationDir workfloder = os.path.join(targetDir, 'workfloder') annotationPath = os.path.join(targetDir, 'Annotation') ####Annocation Files Path GeneExonSplit = os.path.join(annotationPath, annotation_type + '.Exon.Split') GeneFile = os.path.join(annotationPath, annotation_type + '.Gene.Info') #####workfloder path geneMapout = os.path.join(workfloder, 'ModelMultiGene_Map') geneExonout = os.path.join(workfloder, 'GeneExonFile') GeneData_Path = os.path.join(workfloder, 'ExtractMultiGeneData') GeneAbsLoc_path = os.path.join(workfloder, 'ModelMultiGene_AbsLoc') exonLen = os.path.join(workfloder, 'exonLen') exonjunfile = os.path.join(workfloder, 'GeneExonFile') spandExonsLen = os.path.join(workfloder, 'exonLen') readNumFile = os.path.join(workfloder, 'seq_depth') static_exonJunc.staticGeneNewExon(GeneExonSplit, geneMapout, geneExonout) getIsonum.getIsonum(GeneExonSplit, targetDir) getIsoLength.getIsoLen(GeneFile, GeneExonSplit, targetDir) #get isoform length geneExonLen.staticGeneNewExon(GeneExonSplit, exonjunfile, spandExonsLen, ReadLen) plusGeneName.plusGeneName(GeneFile, readFileList, targetDir, readType, ReadLen) #covert bowtie DictIsofGene = {} gene_list = [] isoNo_list = [] length_list = [] isoName_list = [] f = open(GeneFile, 'r') for line in f: line = line.rstrip() line = line.split('\t') gene = line[0] IsoNo = int(line[1]) IsoNameList = line[3:] for i in xrange(IsoNo): DictIsofGene[IsoNameList[i]] = gene gene_list.append(line[0]) isoNo_list.append(int(line[1])) length_list.append(int(line[2])) isoName_list.append(line[3:]) f.close() targetDir = AnnotationDir LocInputPath = os.path.join(targetDir, 'workfloder', 'ModelMultiGene_AbsLoc') exonInputFile = os.path.join(targetDir, 'workfloder', 'GeneExonFile') exonLenFile = os.path.join(targetDir, 'workfloder', 'exonLen') DataOutputPath = os.path.join(targetDir, 'workfloder', 'ModelMultiGene_Data') NormDataOutput = os.path.join(targetDir, 'workfloder', 'ModelMultiGene_NormData') if readType == 'Paired': for j in range(len(rep)): InputPath = os.path.join(workfloder, 'readInput') InputFile = os.path.join(InputPath, 'Lane' + str(j) + '.plusGene') OutputFile = os.path.join(GeneData_Path, lanFile[j]) print InputFile print OutputFile PE_Extract_BowtieData.ExtractGeneBowtieData( InputFile, GeneFile, OutputFile, readNumFile) for l in range(len(rep)): CalLocFileInPath = os.path.join(GeneData_Path, lanFile[l]) CalLocFileOutPath = os.path.join(GeneAbsLoc_path, lanFile[l]) print CalLocFileInPath print CalLocFileOutPath PE_Extract_CalcAbsLoc_my.CalculateAbsoluteLocation( GeneFile, GeneExonSplit, CalLocFileInPath, CalLocFileOutPath, ReadLen) if readType == 'Single': for j in range(len(rep)): InputPath = os.path.join(workfloder, 'readInput') InputFile = os.path.join(InputPath, 'Lane' + str(j) + '.plusGene') OutputFile = os.path.join(GeneData_Path, lanFile[j]) print InputFile print OutputFile SE_Extract_BowtieData.ExtractGeneBowtieData( InputFile, GeneFile, OutputFile, readNumFile) for l in range(len(rep)): CalLocFileInPath = os.path.join(GeneData_Path, lanFile[l]) CalLocFileOutPath = os.path.join(GeneAbsLoc_path, lanFile[l]) print CalLocFileInPath print CalLocFileOutPath SE_Extract_CalcAbsLoc_my.CalculateAbsoluteLocation( GeneFile, GeneExonSplit, CalLocFileInPath, CalLocFileOutPath, ReadLen) f_in = open(GeneFile, 'r') for line in f_in: line = line.rstrip() line = line.split('\t') gene = line[0] isoNo = int(line[1]) length = int(line[2]) isoName = line[3:] for j in range(len(rep)): LocInputPath = os.path.join(targetDir, 'workfloder') LocInputFile = os.path.join(LocInputPath, CatFile[1]) LocInputFile = os.path.join(LocInputFile, lanFile[j]) exonInputFile = os.path.join(targetDir, 'workfloder') exonInputFile = os.path.join(exonInputFile, 'GeneExonFile') exonLenFile = os.path.join(targetDir, 'workfloder') exonLenFile = os.path.join(exonLenFile, 'exonLen') DataOutputPath = os.path.join(targetDir, 'workfloder') DataOutputFile = os.path.join(DataOutputPath, CatFile[2]) NormDataOutput = os.path.join(DataOutputPath, CatFile[3]) static_readOnExon.ModelMultiGeneDataScale( j, gene, length, isoNo, isoName, LocInputFile, exonInputFile, exonLenFile, DataOutputFile, NormDataOutput) f_in.close()
if not os.path.exists(mapFile): os.mkdir(mapFile) GeneExonFile=os.path.join(targetDir,'workfloder') GeneExonFile=os.path.join(GeneExonFile,'GeneExonFile') if not os.path.exists(GeneExonFile): os.mkdir(GeneExonFile) geneMapout=mapFile geneExonout=GeneExonFile get_map.mapout(TargetGeneExonSplitFile,geneMapout,geneExonout)#get isoform Map getIsoLength.getIsoLen(TargetGeneFile,TargetGeneExonSplitFile)#get isoform length exonLen=os.path.join(targetDir,'workfloder') exonLen=os.path.join(exonLen,'exonLen') if not os.path.exists(exonLen): os.mkdir(exonLen) ReadLen=100 exonjunfile=os.path.join(targetDir,'workfloder') exonjunfile=os.path.join(exonjunfile,'GeneExonFile') spandExonsLen=os.path.join(targetDir,'workfloder') spandExonsLen=os.path.join(spandExonsLen,'exonLen') SE_geneExonLen.staticGeneNewExon(TargetGeneExonSplitFile,spandExonsLen)