Example #1
0
 def fetch(self,**kwargs):
     ''' Fetch intervals in a given region. '''
     if kwargs.has_key('chrom'):
         wigs=wWigIO.getIntervals(self.fname,kwargs['chrom'],kwargs.get('start',0),kwargs.get('stop',0))
         if isinstance(wigs,basestring): # bad value
             raise ValueError("Couldn't get intervals.")
         return wigs
     raise ValueError("Chromosome not provided.")
     return
Example #2
0
 def fetch(self, **kwargs):
     ''' Fetch intervals in a given region. '''
     if kwargs.has_key('chrom'):
         wigs = wWigIO.getIntervals(self.fname, kwargs['chrom'],
                                    kwargs.get('start', 0),
                                    kwargs.get('stop', 0))
         if isinstance(wigs, basestring):  # bad value
             raise ValueError("Couldn't get intervals.")
         return wigs
     raise ValueError("Chromosome not provided.")
     return
Example #3
0
# ------------------------------------
# Misc functions
# ------------------------------------

# ------------------------------------
# Classes
# ------------------------------------

# ------------------------------------
# Main
# ------------------------------------

if __name__=="__main__":
    # get information from bigwig file
    wWigIO.open('test.bw')
    chroms = wWigIO.getChromSize('test.bw')
    wigs = wWigIO.getIntervals('test.bw', 'chr1', 10, 200)
    wWigIO.close('test.bw')
    print wigs

    # bigwig -> wig
    wWigIO.bigWigToWig('test.bw','test.wig')

    # write the chrom sizes into test.sizes
    with open('test.sizes','w') as fh:
        for chrom in chroms:
            print >>fh, chrom+"\t"+str(chroms[chrom])
    
    # wig -> bigwig
    wWigIO.wigToBigWig('test.wig','test.sizes','test2.bw')
Example #4
0
def main(sim_fastq, U2_GTAG_5_file, U2_GTAG_3_file, phylop_vertebrates,
         phylop_primates, exon_scores):

    MEs = set([])
    wWigIO.open(phylop_vertebrates)
    wWigIO.open(phylop_primates)

    U2_GTAG_5 = PWM_to_dict(U2_GTAG_5_file)
    U2_GTAG_3 = PWM_to_dict(U2_GTAG_3_file)

    U2_GTAG_5_max_score = 0
    U2_GTAG_3_max_score = 0

    for index in range(13):
        U2_GTAG_5_max_score += max(U2_GTAG_5['A'][index],
                                   U2_GTAG_5['C'][index],
                                   U2_GTAG_5['T'][index],
                                   U2_GTAG_5['G'][index])

    for index in range(17):
        U2_GTAG_3_max_score += max(U2_GTAG_3['A'][index],
                                   U2_GTAG_3['C'][index],
                                   U2_GTAG_3['T'][index],
                                   U2_GTAG_3['G'][index])

    TOTAL_U2_max_score = U2_GTAG_5_max_score + U2_GTAG_3_max_score

    gencode_U2_scores = []
    gencode_mean_conservation_vertebrates = []
    gencode_mean_conservation_primates = []

    for row in csv.reader(open(exon_scores), delimiter=' '):

        chr, estart, eend, strand, U2_score, mean_conservation_vertebrates, mean_conservation_primates = row

        gencode_U2_scores.append(float(U2_score))
        gencode_mean_conservation_vertebrates.append(
            float(mean_conservation_vertebrates))
        gencode_mean_conservation_primates.append(
            float(mean_conservation_primates))

    for row in csv.reader(open(sim_fastq), delimiter='\t'):

        if row[0][0] == "@":

            SJ, ME_seq, estart, eend, total_coverage, n = row[0].split("_")

            len_ME = len(ME_seq)

            SJ = SJ[1:]
            SJ_chr, SJ_istart, SJ_iend = re.findall(r"[\w']+", SJ)

            SJ_len = int(SJ_iend) - int(SJ_istart)
            Kmer = SJ_len - (len_ME + 1)
            P_ME = 1 - (1 - (float(1) / float(4**len_ME + 4)))**Kmer

            strand = "+"

            if "-" in SJ:
                strand = "-"

            estart = int(estart)
            eend = int(eend)

            MEs.add((SJ_chr, strand, estart, eend, P_ME))

    for m in MEs:

        chr, strand, estart, eend, P_ME = m

        estart, eend = sorted([estart, eend])

        E5 = str(Genome[chr][estart - 14:estart + 3]).upper()
        E3 = str(Genome[chr][eend - 3:eend + 10]).upper()

        if strand == "-":

            E5 = str(Genome[chr][eend - 3:eend +
                                 14].reverse_complement()).upper()
            E3 = str(Genome[chr][estart - 10:estart +
                                 3].reverse_complement()).upper()

        U2_score = 0

        i = 0

        for N in E5:
            if N != "N":
                U2_score += U2_GTAG_3[N][i]
                i += 1

        i = 0

        for N in E3:
            if N != "N":
                U2_score += U2_GTAG_5[N][i]
                i += 1

        U2_score = percent(U2_score, TOTAL_U2_max_score)

        conservation_vertebrates = wWigIO.getIntervals(phylop_vertebrates, chr,
                                                       estart - 2, eend + 2)
        conservation_primates = wWigIO.getIntervals(phylop_primates, chr,
                                                    estart - 2, eend + 2)

        mean_conservation_vertebrates = 0
        mean_conservation_primates = 0

        for i in conservation_vertebrates:

            mean_conservation_vertebrates += i[2]

        try:

            mean_conservation_vertebrates = mean_conservation_vertebrates / len(
                conservation_vertebrates)

        except ZeroDivisionError:
            pass

        for i in conservation_primates:

            mean_conservation_primates += i[2]

        try:

            mean_conservation_primates = mean_conservation_primates / len(
                conservation_primates)

        except ZeroDivisionError:
            pass

        ME_percentil_U2_score = stats.percentileofscore(
            gencode_U2_scores, U2_score)
        ME_percentil_mean_conservation_vertebrates = stats.percentileofscore(
            gencode_mean_conservation_vertebrates, mean_conservation_primates)
        ME_percentil_mean_conservation_primates = stats.percentileofscore(
            gencode_mean_conservation_primates, mean_conservation_vertebrates)

        overall_score = P_ME * (1 - ME_percentil_U2_score / 100) * (
            1 - ME_percentil_mean_conservation_vertebrates / 100)

        if ME_percentil_mean_conservation_primates > ME_percentil_mean_conservation_vertebrates:
            overall_score = P_ME * (1 - ME_percentil_U2_score / 100) * (
                1 - ME_percentil_mean_conservation_primates / 100)

        #print chr, estart, eend, strand, U2_score, mean_conservation_vertebrates, mean_conservation_primates
        print chr, estart, eend, strand, U2_score, ME_percentil_U2_score, mean_conservation_vertebrates, ME_percentil_mean_conservation_vertebrates, mean_conservation_primates, ME_percentil_mean_conservation_primates, P_ME, overall_score
def main(gencode_bed, U2_GTAG_5_file, U2_GTAG_3_file, phylop_vertebrates,
         phylop_primates):

    wWigIO.open(phylop_vertebrates)
    wWigIO.open(phylop_primates)

    U2_GTAG_5 = PWM_to_dict(U2_GTAG_5_file)
    U2_GTAG_3 = PWM_to_dict(U2_GTAG_3_file)

    U2_GTAG_5_max_score = 0
    U2_GTAG_3_max_score = 0

    for index in range(13):
        U2_GTAG_5_max_score += max(U2_GTAG_5['A'][index],
                                   U2_GTAG_5['C'][index],
                                   U2_GTAG_5['T'][index],
                                   U2_GTAG_5['G'][index])

    for index in range(17):
        U2_GTAG_3_max_score += max(U2_GTAG_3['A'][index],
                                   U2_GTAG_3['C'][index],
                                   U2_GTAG_3['T'][index],
                                   U2_GTAG_3['G'][index])

    TOTAL_U2_max_score = U2_GTAG_5_max_score + U2_GTAG_3_max_score

    exons = set([])

    for row in csv.reader(open(gencode_bed), delimiter='\t'):

        csv.field_size_limit(1000000000)

        qstarts = map(int, row[11].strip(",").split(","))
        blocksizes = map(int, row[10].strip(",").split(","))

        start = int(row[1])
        strand = row[5]
        bn = int(row[9])
        chr = row[0]

        for q1, b in zip(qstarts[1:-1], blocksizes[1:-1]):
            estart = start + q1
            eend = start + q1 + b

            E5 = str(Genome[chr][estart - 14:estart + 3]).upper()
            E3 = str(Genome[chr][eend - 3:eend + 10]).upper()

            if strand == "-":

                E5 = str(Genome[chr][eend - 3:eend +
                                     14].reverse_complement()).upper()
                E3 = str(Genome[chr][estart - 10:estart +
                                     3].reverse_complement()).upper()

            U2_score = 0

            i = 0

            for N in E5:
                U2_score += U2_GTAG_3[N][i]
                i += 1

            i = 0

            for N in E3:
                U2_score += U2_GTAG_5[N][i]
                i += 1

            U2_score = percent(U2_score, TOTAL_U2_max_score)

            if E5[-5:-3] == "AG" and E3[3:5] == "GT":

                exons.add((chr, estart, eend, strand, U2_score))

            # if " ".join([chr, estart, eend]) == "chr17 26597935 26598725":
            # 	print

    for e in exons:

        chr, estart, eend, strand, U2_score = e

        conservation_vertebrates = wWigIO.getIntervals(phylop_vertebrates, chr,
                                                       estart - 2, eend + 2)
        conservation_primates = wWigIO.getIntervals(phylop_primates, chr,
                                                    estart - 2, eend + 2)

        mean_conservation_vertebrates = 0
        mean_conservation_primates = 0

        for i in conservation_vertebrates:

            mean_conservation_vertebrates += i[2]

        try:

            mean_conservation_vertebrates = mean_conservation_vertebrates / len(
                conservation_vertebrates)

        except ZeroDivisionError:
            pass

        for i in conservation_primates:

            mean_conservation_primates += i[2]

        try:

            mean_conservation_primates = mean_conservation_primates / len(
                conservation_primates)

        except ZeroDivisionError:
            pass

        print chr, estart, eend, strand, U2_score, mean_conservation_vertebrates, mean_conservation_primates
Example #6
0
    def __NDR_bin_detect(self):
        """检视所有 cover 的 bins."""
        for idx in sorted(self.l_umt_met):
            self.__bin_info(idx)
            """对这种可能显著并且不会继续落 reads 的 bin"""
            if idx < self.bin_idx_min:
                np_obs = self.l_umt_met[idx]
                np_exp = self.np_RATIO * np.sum(np_obs)
                chisquare = np.sum((np_obs - np_exp)**2 / np_exp)
                pval = -1 * np.log10(1 - scipy.stats.chi2.cdf(chisquare, 1))
                if np_obs[1] < np_exp[1]:
                    pval = -1 * pval

                pos_center = int((self.bin_begin + self.bin_endin) / 2)
                bin_up = pos_center - self.step_len * 10
                bin_down = pos_center + self.step_len * 10
                #                print bin_up, bin_down
                np_val_ext = np.zeros(2)
                l_rat_umt_met_ext = []
                l_rat_umt_met_sur1 = []
                l_rat_umt_met_sur2 = []

                try:
                    record = self.tb_file.query(self.chrom, bin_up, bin_down)
                    for rec in record:
                        pos = (int(rec[1]) + int(rec[2])) / 2
                        total = int(rec[3]) + int(rec[4])
                        if total >= self.depth:
                            np_val_ext[0] += int(rec[3])
                            np_val_ext[1] += int(rec[4])
                            ratio = int(rec[4]) / (int(rec[3]) + int(rec[4]))
                            l_rat_umt_met_ext.append(ratio)
                            if abs(pos - (self.bin_begin - 40)) < 40:
                                l_rat_umt_met_sur1.append(ratio)
                            if abs(pos - (self.bin_endin + 40)) < 40:
                                l_rat_umt_met_sur2.append(ratio)
                except:
                    pass

                np_obs_ext = np_val_ext
                np_exp_ext = self.np_RATIO * np.sum(np_obs_ext)
                val_reg = np.array(self.l_rat_umt_met[idx]).mean()
                val_ext = np.array(l_rat_umt_met_ext).mean()
                val_sur1 = np.array(l_rat_umt_met_sur1).mean()
                val_sur2 = np.array(l_rat_umt_met_sur2).mean()

                chisquare_ext = np.sum(
                    (np_obs_ext - np_exp_ext)**2 / np_exp_ext)
                pval_ext = -1 * np.log10(
                    1 - scipy.stats.chi2.cdf(chisquare_ext, 1))
                if np_obs_ext[1] < np_exp_ext[1]:
                    pval_ext = -1 * pval_ext

                pval_ttest = -1 * np.log10(
                    scipy.stats.ttest_ind(self.l_rat_umt_met[idx],
                                          l_rat_umt_met_ext,
                                          equal_var=False)[1])
                if val_reg < val_ext:
                    pval_ttest = -1 * pval_ttest

                pval_ttest2l = -1 * np.log10(
                    scipy.stats.ttest_ind(self.l_rat_umt_met[idx],
                                          l_rat_umt_met_sur1,
                                          equal_var=False)[1])
                if val_reg < val_sur1:
                    pval_ttest2l = -1 * pval_ttest2l

                pval_ttest2r = -1 * np.log10(
                    scipy.stats.ttest_ind(self.l_rat_umt_met[idx],
                                          l_rat_umt_met_sur2,
                                          equal_var=False)[1])
                if val_reg < val_sur2:
                    pval_ttest2r = -1 * pval_ttest2r

                np_bw = np.array([
                    f[2] for f in wWigIO.getIntervals(
                        in_bw, self.chrom, self.bin_begin, self.bin_endin)
                ])
                #                print self.bin_begin, self.bin_endin, np_bw
                mean_bw = np_bw.mean()

                print "%s\t%d\t%d\t%1.2f\t%1.2f\t%1.2f\t%1.2f\t%1.2f\t%1.2f\t%d,%d\t%d,%d\t%1.2f,%1.2f\t%1.2f,%1.2f\t%1.2f,%1.2f,%1.2f,%1.2f" % (
                    self.chrom, self.bin_begin, self.bin_endin, mean_bw, pval,
                    pval_ext, pval_ttest, pval_ttest2l, pval_ttest2r,
                    self.l_umt_met[idx][0], self.l_umt_met[idx][1],
                    np_val_ext[0], np_val_ext[1], np_exp[0], np_exp[1],
                    np_exp_ext[0], np_exp_ext[1], val_reg, val_ext, val_sur1,
                    val_sur2)
                #                print "%s\t%d\t%d\t%1.2f\t%s\t%s" % (self.chrom, self.bin_begin, self.bin_endin, pval_ttest, val_reg, val_ext)
                del self.l_umt_met[idx]
                del self.l_rat_umt_met[idx]
Example #7
0
# ------------------------------------
# Misc functions
# ------------------------------------

# ------------------------------------
# Classes
# ------------------------------------

# ------------------------------------
# Main
# ------------------------------------

if __name__ == "__main__":
    # get information from bigwig file
    wWigIO.open('test.bw')
    chroms = wWigIO.getChromSize('test.bw')
    wigs = wWigIO.getIntervals('test.bw', 'chr1', 10, 200)
    wWigIO.close('test.bw')
    print wigs

    # bigwig -> wig
    wWigIO.bigWigToWig('test.bw', 'test.wig')

    # write the chrom sizes into test.sizes
    with open('test.sizes', 'w') as fh:
        for chrom in chroms:
            print >> fh, chrom + "\t" + str(chroms[chrom])

    # wig -> bigwig
    wWigIO.wigToBigWig('test.wig', 'test.sizes', 'test2.bw')
Example #8
0
def main(sim_fastq, U2_GTAG_5_file, U2_GTAG_3_file, phylop_vertebrates,
         phylop_primates):

    MEs = set([])
    wWigIO.open(phylop_vertebrates)
    wWigIO.open(phylop_primates)

    U2_GTAG_5 = PWM_to_dict(U2_GTAG_5_file)
    U2_GTAG_3 = PWM_to_dict(U2_GTAG_3_file)

    U2_GTAG_5_max_score = 0
    U2_GTAG_3_max_score = 0

    for index in range(13):
        U2_GTAG_5_max_score += max(U2_GTAG_5['A'][index],
                                   U2_GTAG_5['C'][index],
                                   U2_GTAG_5['T'][index],
                                   U2_GTAG_5['G'][index])

    for index in range(17):
        U2_GTAG_3_max_score += max(U2_GTAG_3['A'][index],
                                   U2_GTAG_3['C'][index],
                                   U2_GTAG_3['T'][index],
                                   U2_GTAG_3['G'][index])

    TOTAL_U2_max_score = U2_GTAG_5_max_score + U2_GTAG_3_max_score

    for row in csv.reader(open(sim_fastq), delimiter='\t'):

        chr, estart, eend, exon, exon_len, strand = row

        estart = int(estart)
        eend = int(eend)

        # 	if row[0][0]=="@":

        # 		SJ, ME_seq, estart, eend, total_coverage, n = row[0].split("_")

        # 		len_ME = len(ME_seq)

        # 		SJ = SJ[1:]
        # 		SJ_chr, SJ_istart, SJ_iend = re.findall(r"[\w']+", SJ)

        # 		SJ_len = int(SJ_iend) - int(SJ_istart)
        # 		Kmer = SJ_len - (len_ME+1)
        # 		P_ME = 1 - ( 1 - (float(1)/float(4**len_ME+4)))**Kmer

        # 		strand = "+"

        # 		if "-" in SJ:
        # 			strand = "-"

        # 		estart = int(estart)
        # 		eend = int(eend)

        # 		MEs.add((SJ_chr, strand, estart, eend, P_ME))

        # for m in MEs:

        # 	chr, strand, estart, eend, P_ME = m

        # 	estart, eend = sorted([estart, eend])

        E5 = str(Genome[chr][estart - 14:estart + 3]).upper()
        E3 = str(Genome[chr][eend - 3:eend + 10]).upper()

        if strand == "-":

            E5 = str(Genome[chr][eend - 3:eend +
                                 14].reverse_complement()).upper()
            E3 = str(Genome[chr][estart - 10:estart +
                                 3].reverse_complement()).upper()

        E5 = E5[:-5] + "AG" + E5[-3:]
        E3 = E3[:3] + "GT" + E3[5:]

        U2_score = 0
        ME5_U2_score = 0
        ME3_U2_score = 0

        i = 0

        for N in E5:
            if N != "N":
                U2_score += U2_GTAG_3[N][i]
                ME5_U2_score += U2_GTAG_3[N][i]
                i += 1

        i = 0

        for N in E3:
            if N != "N":
                U2_score += U2_GTAG_5[N][i]
                ME3_U2_score += U2_GTAG_5[N][i]
                i += 1

        ME3_U2_score = percent(ME3_U2_score, U2_GTAG_5_max_score)
        ME5_U2_score = percent(ME5_U2_score, U2_GTAG_3_max_score)

        U2_score = percent(U2_score, TOTAL_U2_max_score)

        conservation_vertebrates = wWigIO.getIntervals(phylop_vertebrates, chr,
                                                       estart - 2, eend + 2)
        conservation_primates = wWigIO.getIntervals(phylop_primates, chr,
                                                    estart - 2, eend + 2)

        mean_conservation_vertebrates = 0
        mean_conservation_primates = 0

        for i in conservation_vertebrates:

            mean_conservation_vertebrates += i[2]

        try:

            mean_conservation_vertebrates = mean_conservation_vertebrates / len(
                conservation_vertebrates)

        except ZeroDivisionError:
            pass

        for i in conservation_primates:

            mean_conservation_primates += i[2]

        try:

            mean_conservation_primates = mean_conservation_primates / len(
                conservation_primates)

        except ZeroDivisionError:
            pass

        #print chr, estart, eend, strand, U2_score, mean_conservation_vertebrates, mean_conservation_primates
        print chr, estart, eend, strand, U2_score, ME5_U2_score, ME3_U2_score, mean_conservation_vertebrates, mean_conservation_primates
Example #9
0
def main(sim_fastq,  U2_GTAG_5_file, U2_GTAG_3_file, phylop_vertebrates, phylop_primates):

	MEs = set([])
	wWigIO.open(phylop_vertebrates)
	wWigIO.open(phylop_primates)

	U2_GTAG_5 = PWM_to_dict(U2_GTAG_5_file)
	U2_GTAG_3 = PWM_to_dict(U2_GTAG_3_file)

	U2_GTAG_5_max_score = 0
	U2_GTAG_3_max_score = 0

	for index in range(13):
		U2_GTAG_5_max_score += max(U2_GTAG_5['A'][index], U2_GTAG_5['C'][index], U2_GTAG_5['T'][index], U2_GTAG_5['G'][index])

	for index in range(17):
		U2_GTAG_3_max_score += max(U2_GTAG_3['A'][index], U2_GTAG_3['C'][index], U2_GTAG_3['T'][index], U2_GTAG_3['G'][index])
	
	TOTAL_U2_max_score = U2_GTAG_5_max_score + U2_GTAG_3_max_score


	for row in csv.reader(open(sim_fastq), delimiter = '\t'):

		chr, estart, eend, exon, exon_len, strand = row

		estart = int(estart)
		eend = int(eend)



	# 	if row[0][0]=="@":

	# 		SJ, ME_seq, estart, eend, total_coverage, n = row[0].split("_")

	# 		len_ME = len(ME_seq)

	# 		SJ = SJ[1:]
	# 		SJ_chr, SJ_istart, SJ_iend = re.findall(r"[\w']+", SJ)


	# 		SJ_len = int(SJ_iend) - int(SJ_istart)
	# 		Kmer = SJ_len - (len_ME+1)
	# 		P_ME = 1 - ( 1 - (float(1)/float(4**len_ME+4)))**Kmer	

	# 		strand = "+"

	# 		if "-" in SJ:
	# 			strand = "-"

	# 		estart = int(estart)
	# 		eend = int(eend)

	# 		MEs.add((SJ_chr, strand, estart, eend, P_ME))


	# for m in MEs:

	# 	chr, strand, estart, eend, P_ME = m

	# 	estart, eend = sorted([estart, eend])

		E5 = str(Genome[chr][estart-14:estart+3]).upper()
		E3 = str(Genome[chr][eend-3:eend+10]).upper()


		if strand == "-":

			E5 = str(Genome[chr][eend-3:eend+14].reverse_complement()).upper()
			E3 = str(Genome[chr][estart-10:estart+3].reverse_complement()).upper()

		E5 = E5[:-5] + "AG" + E5[-3:]
		E3 = E3[:3] + "GT" + E3[5:]


		U2_score = 0
		ME5_U2_score = 0
		ME3_U2_score = 0	

		i = 0


		for N in E5:
			if N!="N":
				U2_score += U2_GTAG_3[N][i]
				ME5_U2_score += U2_GTAG_3[N][i]
				i += 1

		i = 0

		for N in E3:
			if N!="N":
				U2_score += U2_GTAG_5[N][i]
				ME3_U2_score += U2_GTAG_5[N][i]
				i += 1

		ME3_U2_score = percent(ME3_U2_score, U2_GTAG_5_max_score)
		ME5_U2_score = percent(ME5_U2_score, U2_GTAG_3_max_score)

		U2_score = percent(U2_score, TOTAL_U2_max_score)

		conservation_vertebrates = wWigIO.getIntervals(phylop_vertebrates, chr, estart-2, eend+2)
		conservation_primates = wWigIO.getIntervals(phylop_primates, chr, estart-2, eend+2)

		mean_conservation_vertebrates = 0
		mean_conservation_primates = 0

		for i in conservation_vertebrates:

			mean_conservation_vertebrates += i[2]

		try:

			mean_conservation_vertebrates = mean_conservation_vertebrates/len(conservation_vertebrates)

		except ZeroDivisionError:
			pass

		
		for i in conservation_primates:

			mean_conservation_primates += i[2]

		try:

			mean_conservation_primates = mean_conservation_primates/len(conservation_primates)

		except ZeroDivisionError:
			pass

		#print chr, estart, eend, strand, U2_score, mean_conservation_vertebrates, mean_conservation_primates
		print chr, estart, eend, strand, U2_score, ME5_U2_score, ME3_U2_score,  mean_conservation_vertebrates, mean_conservation_primates
Example #10
0
def main(gencode_bed, U2_GTAG_5_file, U2_GTAG_3_file, phylop_vertebrates, phylop_primates):

	wWigIO.open(phylop_vertebrates)
	wWigIO.open(phylop_primates)

	U2_GTAG_5 = PWM_to_dict(U2_GTAG_5_file)
	U2_GTAG_3 = PWM_to_dict(U2_GTAG_3_file)

	U2_GTAG_5_max_score = 0
	U2_GTAG_3_max_score = 0

	for index in range(13):
		U2_GTAG_5_max_score += max(U2_GTAG_5['A'][index], U2_GTAG_5['C'][index], U2_GTAG_5['T'][index], U2_GTAG_5['G'][index])

	for index in range(17):
		U2_GTAG_3_max_score += max(U2_GTAG_3['A'][index], U2_GTAG_3['C'][index], U2_GTAG_3['T'][index], U2_GTAG_3['G'][index])
	
	TOTAL_U2_max_score = U2_GTAG_5_max_score + U2_GTAG_3_max_score

	exons = set([])

	for row in csv.reader(open(gencode_bed), delimiter = '\t'):
		
		csv.field_size_limit(1000000000)

		qstarts = map (int, row[11].strip(",").split(","))                      
		blocksizes = map(int, row[10].strip(",").split(","))

		start = int(row[1])
		strand = row[5]
		bn = int(row[9])
		chr = row[0]

		

		for q1, b in zip(qstarts[1:-1], blocksizes[1:-1]):
			estart = start + q1
			eend = start + q1 + b


			E5 = str(Genome[chr][estart-14:estart+3]).upper()
			E3 = str(Genome[chr][eend-3:eend+10]).upper()


			if strand == "-":

				E5 = str(Genome[chr][eend-3:eend+14].reverse_complement()).upper()
				E3 = str(Genome[chr][estart-10:estart+3].reverse_complement()).upper()


			U2_score = 0

			i = 0

			for N in E5:
				U2_score += U2_GTAG_3[N][i]
				i += 1

			i = 0

			for N in E3:
				U2_score += U2_GTAG_5[N][i]
				i += 1

			U2_score = percent(U2_score, TOTAL_U2_max_score)



			if E5[-5:-3]=="AG" and E3[3:5] == "GT":



				exons.add((chr, estart, eend, strand, U2_score))

			# if " ".join([chr, estart, eend]) == "chr17 26597935 26598725":
			# 	print 


	for e in exons:



		chr, estart, eend, strand, U2_score = e

		conservation_vertebrates = wWigIO.getIntervals(phylop_vertebrates, chr, estart-2, eend+2)
		conservation_primates = wWigIO.getIntervals(phylop_primates, chr, estart-2, eend+2)

		mean_conservation_vertebrates = 0
		mean_conservation_primates = 0

		for i in conservation_vertebrates:

			mean_conservation_vertebrates += i[2]

		try:

			mean_conservation_vertebrates = mean_conservation_vertebrates/len(conservation_vertebrates)

		except ZeroDivisionError:
			pass

		
		for i in conservation_primates:

			mean_conservation_primates += i[2]

		try:

			mean_conservation_primates = mean_conservation_primates/len(conservation_primates)

		except ZeroDivisionError:
			pass

		print chr, estart, eend, strand, U2_score, mean_conservation_vertebrates, mean_conservation_primates