Esempio n. 1
0
def loadExonH():

    exonH = {}

    refFlatH = mygenome.loadRefFlatByChr()

    for chrom in refFlatH.keys():

        if chrom not in exonH:
            exonH[chrom] = []

        for tH in refFlatH[chrom]:

            for i in range(len(tH["exnList"])):
                exonH[chrom].append(tH["exnList"][i])

    kgH = mygenome.loadKgByChr()

    for chrom in kgH.keys():

        if chrom not in exonH:
            exonH[chrom] = []

        for tH in kgH[chrom]:

            for i in range(len(tH["exnList"])):
                exonH[chrom].append(tH["exnList"][i])

        exonH[chrom] = list(set(exonH[chrom]))

        exonH[chrom].sort(lambda x, y: cmp(x[1], y[1]))
        exonH[chrom].sort(lambda x, y: cmp(x[0], y[0]))

    return exonH
Esempio n. 2
0
def loadExonH():

    exonH = {}

    refFlatH = mygenome.loadRefFlatByChr()

    for chrom in refFlatH.keys():

        if chrom not in exonH:
            exonH[chrom] = []

        for tH in refFlatH[chrom]:

            for i in range(len(tH['exnList'])):
                exonH[chrom].append(tH['exnList'][i])

    kgH = mygenome.loadKgByChr()

    for chrom in kgH.keys():

        if chrom not in exonH:
            exonH[chrom] = []

        for tH in kgH[chrom]:

            for i in range(len(tH['exnList'])):
                exonH[chrom].append(tH['exnList'][i])

        exonH[chrom] = list(set(exonH[chrom]))

        exonH[chrom].sort(lambda x, y: cmp(x[1], y[1]))
        exonH[chrom].sort(lambda x, y: cmp(x[0], y[0]))

    return exonH
Esempio n. 3
0
def genKgCompositeModel(outTextFileName, outFaFileName):

    kgH = mygenome.loadKgByChr()

    outTextFile = open(outTextFileName, 'w')
    outFaFile = open(outFaFileName, 'w')

    for chrNum in range(1, 23) + ['X', 'Y', 'M']:
        #for chrNum in [1]:

        chrom = 'chr%s' % chrNum

        txnLocusL_combined = []

        for strand in ['+', '-']:

            txnLocusL = [
                mygenome.locus(
                    '%s:%s-%s%s' % (chrom, h['txnSta'], h['txnEnd'], strand),
                    h['kgId'])
                for h in filter(lambda x: x['strand'] == strand, kgH[chrom])
            ]
            n_before = len(txnLocusL)

            txnLocusL = mygenome.mergeLoci(txnLocusL)
            n_after = len(txnLocusL)

            #print chrom, strand, n_before, n_after

            txnLocusL_combined += txnLocusL

        txnLocusL_combined.sort(lambda x, y: cmp(x.chrEnd, y.chrEnd))
        txnLocusL_combined.sort(lambda x, y: cmp(x.chrSta, y.chrSta))

        for txnLoc in txnLocusL_combined:

            exnLocusL = []

            for h in filter(lambda x: x['kgId'] in txnLoc.id, kgH[chrom]):
                for (exnSta, exnEnd) in h['exnList']:
                    exnLocusL.append(
                        mygenome.locus('%s:%s-%s%s' %
                                       (chrom, exnSta, exnEnd, h['strand'])))

            exnLocusL.sort(lambda x, y: cmp(x.chrEnd, y.chrEnd))
            exnLocusL.sort(lambda x, y: cmp(x.chrSta, y.chrSta))

            exnLocusL = mygenome.mergeLoci(exnLocusL)

            exnStaL = [str(exnLoc.chrSta) for exnLoc in exnLocusL]
            exnEndL = [str(exnLoc.chrEnd) for exnLoc in exnLocusL]

            outTextFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %
                              (txnLoc.id, txnLoc.chrom, txnLoc.strand,
                               txnLoc.chrSta, txnLoc.chrEnd, len(exnLocusL),
                               ','.join(exnStaL), ','.join(exnEndL)))

            outFaFile.write('>%s|%s|%s|%s|%s\n' %
                            (txnLoc.id, txnLoc.chrom, txnLoc.strand,
                             txnLoc.chrSta, txnLoc.chrEnd))

            #			for exnLoc in exnLocusL:
            #				outFaFile.write(exnLoc.nibFrag())

            txnLocCopy = copy.deepcopy(
                txnLoc)  # print whole txn sequence in positive strand
            txnLocCopy.strand = '+'

            outFaFile.write(txnLocCopy.nibFrag())

            outFaFile.write('\n')

    outTextFile.close()
    outFaFile.close()
Esempio n. 4
0
def genCompositeModel(outTextFileName,outFaFileName,intronSize=100): 

	geneNameH = mygenome.geneNameH()
	geneSetH = mygenome.geneSetH()
	geneInfoH = mygenome.geneInfoH(geneNameH,geneSetH)

	geneH = mygenome.loadKgByChr()
	#geneH = mygenome.loadLincByChr(h=geneH)

	outTextFile = open(outTextFileName, 'w')
	outFaFile = open(outFaFileName, 'w')

	for chrNum in range(1,23)+['X','Y','M']:
	#for chrNum in [1]:

		chrom = 'chr%s' % chrNum

		geneH_byChr = filter(lambda x: mygenome.gene(x['geneId'],geneNameH,geneSetH,geneInfoH).geneName in mygenome.TK, geneH[chrom])

		txnLocusL_combined = []

		for strand in ['+','-']:

			txnLocusL = [mygenome.locus('%s:%s-%s%s' % (chrom,h['txnSta'],h['txnEnd'],strand),h['geneId']) for h in filter(lambda x: x['strand']==strand, geneH_byChr)]
			n_before = len(txnLocusL)

			txnLocusL = mygenome.mergeLoci(txnLocusL)
			n_after = len(txnLocusL)

			#print chrom, strand, n_before, n_after

			txnLocusL_combined += txnLocusL

		txnLocusL_combined.sort(lambda x,y: cmp(x.chrEnd,y.chrEnd))
		txnLocusL_combined.sort(lambda x,y: cmp(x.chrSta,y.chrSta))

		for txnLoc in txnLocusL_combined:

			exnLocusL = []

			for h in filter(lambda x: x['geneId'] in txnLoc.id, geneH_byChr):
				for (exnSta,exnEnd) in h['exnList']:
					exnLocusL.append(mygenome.locus('%s:%s-%s%s' % (chrom, exnSta, exnEnd, h['strand'])))

			exnLocusL.sort(lambda x,y: cmp(x.chrEnd,y.chrEnd))
			exnLocusL.sort(lambda x,y: cmp(x.chrSta,y.chrSta))

			exnLocusL = mygenome.mergeLoci(exnLocusL)

			exnStaL = [str(exnLoc.chrSta) for exnLoc in exnLocusL]
			exnEndL = [str(exnLoc.chrEnd) for exnLoc in exnLocusL]

			outTextFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (txnLoc.id,txnLoc.chrom,txnLoc.strand,txnLoc.chrSta,txnLoc.chrEnd,len(exnLocusL),','.join(exnStaL),','.join(exnEndL)))

			outFaFile.write('>%s|%s|%s|%s|%s\n' % (txnLoc.id,txnLoc.chrom,txnLoc.strand,txnLoc.chrSta,txnLoc.chrEnd))

			for i in range(len(exnLocusL)):

				exnLocCopy = copy.deepcopy(exnLocusL[i])

				exnLocCopy.strand = '+'

				if i > 0:
					exnLocCopy.chrSta -= min(intronSize, int((exnLocusL[i].chrSta - exnLocusL[i-1].chrEnd)/2))

				if i < len(exnLocusL)-1:
					exnLocCopy.chrEnd += min(intronSize, int((exnLocusL[i+1].chrSta - exnLocusL[i].chrEnd)/2))

				outFaFile.write(exnLocCopy.nibFrag())

			outFaFile.write('\n')

	outTextFile.close()
	outFaFile.close()
Esempio n. 5
0
def genKgCompositeModel(outTextFileName,outFaFileName):

	kgH = mygenome.loadKgByChr()

	outTextFile = open(outTextFileName, 'w')
	outFaFile = open(outFaFileName, 'w')

	for chrNum in range(1,23)+['X','Y','M']:
	#for chrNum in [1]:

		chrom = 'chr%s' % chrNum

		txnLocusL_combined = []

		for strand in ['+','-']:

			txnLocusL = [mygenome.locus('%s:%s-%s%s' % (chrom,h['txnSta'],h['txnEnd'],strand),h['kgId']) for h in filter(lambda x: x['strand']==strand, kgH[chrom])]
			n_before = len(txnLocusL)

			txnLocusL = mygenome.mergeLoci(txnLocusL)
			n_after = len(txnLocusL)

			#print chrom, strand, n_before, n_after

			txnLocusL_combined += txnLocusL

		txnLocusL_combined.sort(lambda x,y: cmp(x.chrEnd,y.chrEnd))
		txnLocusL_combined.sort(lambda x,y: cmp(x.chrSta,y.chrSta))

		for txnLoc in txnLocusL_combined:

			exnLocusL = []

			for h in filter(lambda x: x['kgId'] in txnLoc.id, kgH[chrom]):
				for (exnSta,exnEnd) in h['exnList']:
					exnLocusL.append(mygenome.locus('%s:%s-%s%s' % (chrom, exnSta, exnEnd, h['strand'])))

			exnLocusL.sort(lambda x,y: cmp(x.chrEnd,y.chrEnd))
			exnLocusL.sort(lambda x,y: cmp(x.chrSta,y.chrSta))

			exnLocusL = mygenome.mergeLoci(exnLocusL)

			exnStaL = [str(exnLoc.chrSta) for exnLoc in exnLocusL]
			exnEndL = [str(exnLoc.chrEnd) for exnLoc in exnLocusL]

			outTextFile.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (txnLoc.id,txnLoc.chrom,txnLoc.strand,txnLoc.chrSta,txnLoc.chrEnd,len(exnLocusL),','.join(exnStaL),','.join(exnEndL)))

			outFaFile.write('>%s|%s|%s|%s|%s\n' % (txnLoc.id,txnLoc.chrom,txnLoc.strand,txnLoc.chrSta,txnLoc.chrEnd))

#			for exnLoc in exnLocusL:
#				outFaFile.write(exnLoc.nibFrag())

			txnLocCopy = copy.deepcopy(txnLoc) # print whole txn sequence in positive strand
			txnLocCopy.strand = '+'

			outFaFile.write(txnLocCopy.nibFrag())

			outFaFile.write('\n')

	outTextFile.close()
	outFaFile.close()