def loadExonH(): exonH = {} refFlatH = mygenome.loadRefFlatByChr() for chrom in refFlatH.keys(): if chrom not in exonH: exonH[chrom] = [] for tH in refFlatH[chrom]: for i in range(len(tH["exnList"])): exonH[chrom].append(tH["exnList"][i]) kgH = mygenome.loadKgByChr() for chrom in kgH.keys(): if chrom not in exonH: exonH[chrom] = [] for tH in kgH[chrom]: for i in range(len(tH["exnList"])): exonH[chrom].append(tH["exnList"][i]) exonH[chrom] = list(set(exonH[chrom])) exonH[chrom].sort(lambda x, y: cmp(x[1], y[1])) exonH[chrom].sort(lambda x, y: cmp(x[0], y[0])) return exonH
def loadExonH(): exonH = {} refFlatH = mygenome.loadRefFlatByChr() for chrom in refFlatH.keys(): if chrom not in exonH: exonH[chrom] = [] for tH in refFlatH[chrom]: for i in range(len(tH['exnList'])): exonH[chrom].append(tH['exnList'][i]) kgH = mygenome.loadKgByChr() for chrom in kgH.keys(): if chrom not in exonH: exonH[chrom] = [] for tH in kgH[chrom]: for i in range(len(tH['exnList'])): exonH[chrom].append(tH['exnList'][i]) exonH[chrom] = list(set(exonH[chrom])) exonH[chrom].sort(lambda x, y: cmp(x[1], y[1])) exonH[chrom].sort(lambda x, y: cmp(x[0], y[0])) return exonH
def genKgCompositeModel(outTextFileName, outFaFileName): kgH = mygenome.loadKgByChr() outTextFile = open(outTextFileName, 'w') outFaFile = open(outFaFileName, 'w') for chrNum in range(1, 23) + ['X', 'Y', 'M']: #for chrNum in [1]: chrom = 'chr%s' % chrNum txnLocusL_combined = [] for strand in ['+', '-']: txnLocusL = [ mygenome.locus( '%s:%s-%s%s' % (chrom, h['txnSta'], h['txnEnd'], strand), h['kgId']) for h in filter(lambda x: x['strand'] == strand, kgH[chrom]) ] n_before = len(txnLocusL) txnLocusL = mygenome.mergeLoci(txnLocusL) n_after = len(txnLocusL) #print chrom, strand, n_before, n_after txnLocusL_combined += txnLocusL txnLocusL_combined.sort(lambda x, y: cmp(x.chrEnd, y.chrEnd)) txnLocusL_combined.sort(lambda x, y: cmp(x.chrSta, y.chrSta)) for txnLoc in txnLocusL_combined: exnLocusL = [] for h in filter(lambda x: x['kgId'] in txnLoc.id, kgH[chrom]): for (exnSta, exnEnd) in h['exnList']: exnLocusL.append( mygenome.locus('%s:%s-%s%s' % (chrom, exnSta, exnEnd, h['strand']))) exnLocusL.sort(lambda x, y: cmp(x.chrEnd, y.chrEnd)) exnLocusL.sort(lambda x, y: cmp(x.chrSta, y.chrSta)) exnLocusL = mygenome.mergeLoci(exnLocusL) exnStaL = [str(exnLoc.chrSta) for exnLoc in exnLocusL] exnEndL = [str(exnLoc.chrEnd) for exnLoc in exnLocusL] outTextFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (txnLoc.id, txnLoc.chrom, txnLoc.strand, txnLoc.chrSta, txnLoc.chrEnd, len(exnLocusL), ','.join(exnStaL), ','.join(exnEndL))) outFaFile.write('>%s|%s|%s|%s|%s\n' % (txnLoc.id, txnLoc.chrom, txnLoc.strand, txnLoc.chrSta, txnLoc.chrEnd)) # for exnLoc in exnLocusL: # outFaFile.write(exnLoc.nibFrag()) txnLocCopy = copy.deepcopy( txnLoc) # print whole txn sequence in positive strand txnLocCopy.strand = '+' outFaFile.write(txnLocCopy.nibFrag()) outFaFile.write('\n') outTextFile.close() outFaFile.close()
def genCompositeModel(outTextFileName,outFaFileName,intronSize=100): geneNameH = mygenome.geneNameH() geneSetH = mygenome.geneSetH() geneInfoH = mygenome.geneInfoH(geneNameH,geneSetH) geneH = mygenome.loadKgByChr() #geneH = mygenome.loadLincByChr(h=geneH) outTextFile = open(outTextFileName, 'w') outFaFile = open(outFaFileName, 'w') for chrNum in range(1,23)+['X','Y','M']: #for chrNum in [1]: chrom = 'chr%s' % chrNum geneH_byChr = filter(lambda x: mygenome.gene(x['geneId'],geneNameH,geneSetH,geneInfoH).geneName in mygenome.TK, geneH[chrom]) txnLocusL_combined = [] for strand in ['+','-']: txnLocusL = [mygenome.locus('%s:%s-%s%s' % (chrom,h['txnSta'],h['txnEnd'],strand),h['geneId']) for h in filter(lambda x: x['strand']==strand, geneH_byChr)] n_before = len(txnLocusL) txnLocusL = mygenome.mergeLoci(txnLocusL) n_after = len(txnLocusL) #print chrom, strand, n_before, n_after txnLocusL_combined += txnLocusL txnLocusL_combined.sort(lambda x,y: cmp(x.chrEnd,y.chrEnd)) txnLocusL_combined.sort(lambda x,y: cmp(x.chrSta,y.chrSta)) for txnLoc in txnLocusL_combined: exnLocusL = [] for h in filter(lambda x: x['geneId'] in txnLoc.id, geneH_byChr): for (exnSta,exnEnd) in h['exnList']: exnLocusL.append(mygenome.locus('%s:%s-%s%s' % (chrom, exnSta, exnEnd, h['strand']))) exnLocusL.sort(lambda x,y: cmp(x.chrEnd,y.chrEnd)) exnLocusL.sort(lambda x,y: cmp(x.chrSta,y.chrSta)) exnLocusL = mygenome.mergeLoci(exnLocusL) exnStaL = [str(exnLoc.chrSta) for exnLoc in exnLocusL] exnEndL = [str(exnLoc.chrEnd) for exnLoc in exnLocusL] outTextFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (txnLoc.id,txnLoc.chrom,txnLoc.strand,txnLoc.chrSta,txnLoc.chrEnd,len(exnLocusL),','.join(exnStaL),','.join(exnEndL))) outFaFile.write('>%s|%s|%s|%s|%s\n' % (txnLoc.id,txnLoc.chrom,txnLoc.strand,txnLoc.chrSta,txnLoc.chrEnd)) for i in range(len(exnLocusL)): exnLocCopy = copy.deepcopy(exnLocusL[i]) exnLocCopy.strand = '+' if i > 0: exnLocCopy.chrSta -= min(intronSize, int((exnLocusL[i].chrSta - exnLocusL[i-1].chrEnd)/2)) if i < len(exnLocusL)-1: exnLocCopy.chrEnd += min(intronSize, int((exnLocusL[i+1].chrSta - exnLocusL[i].chrEnd)/2)) outFaFile.write(exnLocCopy.nibFrag()) outFaFile.write('\n') outTextFile.close() outFaFile.close()
def genKgCompositeModel(outTextFileName,outFaFileName): kgH = mygenome.loadKgByChr() outTextFile = open(outTextFileName, 'w') outFaFile = open(outFaFileName, 'w') for chrNum in range(1,23)+['X','Y','M']: #for chrNum in [1]: chrom = 'chr%s' % chrNum txnLocusL_combined = [] for strand in ['+','-']: txnLocusL = [mygenome.locus('%s:%s-%s%s' % (chrom,h['txnSta'],h['txnEnd'],strand),h['kgId']) for h in filter(lambda x: x['strand']==strand, kgH[chrom])] n_before = len(txnLocusL) txnLocusL = mygenome.mergeLoci(txnLocusL) n_after = len(txnLocusL) #print chrom, strand, n_before, n_after txnLocusL_combined += txnLocusL txnLocusL_combined.sort(lambda x,y: cmp(x.chrEnd,y.chrEnd)) txnLocusL_combined.sort(lambda x,y: cmp(x.chrSta,y.chrSta)) for txnLoc in txnLocusL_combined: exnLocusL = [] for h in filter(lambda x: x['kgId'] in txnLoc.id, kgH[chrom]): for (exnSta,exnEnd) in h['exnList']: exnLocusL.append(mygenome.locus('%s:%s-%s%s' % (chrom, exnSta, exnEnd, h['strand']))) exnLocusL.sort(lambda x,y: cmp(x.chrEnd,y.chrEnd)) exnLocusL.sort(lambda x,y: cmp(x.chrSta,y.chrSta)) exnLocusL = mygenome.mergeLoci(exnLocusL) exnStaL = [str(exnLoc.chrSta) for exnLoc in exnLocusL] exnEndL = [str(exnLoc.chrEnd) for exnLoc in exnLocusL] outTextFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (txnLoc.id,txnLoc.chrom,txnLoc.strand,txnLoc.chrSta,txnLoc.chrEnd,len(exnLocusL),','.join(exnStaL),','.join(exnEndL))) outFaFile.write('>%s|%s|%s|%s|%s\n' % (txnLoc.id,txnLoc.chrom,txnLoc.strand,txnLoc.chrSta,txnLoc.chrEnd)) # for exnLoc in exnLocusL: # outFaFile.write(exnLoc.nibFrag()) txnLocCopy = copy.deepcopy(txnLoc) # print whole txn sequence in positive strand txnLocCopy.strand = '+' outFaFile.write(txnLocCopy.nibFrag()) outFaFile.write('\n') outTextFile.close() outFaFile.close()