Example #1
0
    feature_coords = load(open(feature_coords_fn))

print >> stderr, "Note: using FASTA file at %s" % fastafn

f = open(gtffn)
junctions = set()
for row in f:  # for each row in the input GTF
    # we need to get rid of first and last exons
    if row.strip().split('\t')[2] in ['start_codon', 'stop_codon', 'CDS']:
        continue
    if feature_coords_fn:  # if we have the resource to check whether this is a terminal exon or not
        #		print >> stderr,row.strip()
        strand = row.strip().split('\t')[6]
        feature, blank = parse_lines([row.strip()], strand, get_transcripts)
        #		print >> stderr,feature
        is_terminal_feature, up_exon, down_exon = terminal_exon(
            list(feature)[0], feature_coords)
        if is_terminal_feature:  # if this is terminal ignore it
            continue
        # now we know that this is not a terminal exon so there must be neighbouring exons
        up_exon_gtf = region_to_GTF(up_exon, feature_coords[up_exon],
                                    get_transcripts)
        down_exon_gtf = region_to_GTF(down_exon, feature_coords[down_exon],
                                      get_transcripts)

        if get_introns:
            up_exon_five, up_exon_three = GTFrow_to_5p3pcoords(
                up_exon_gtf, offset, use_chromnames)
            five, three = GTFrow_to_5p3pcoords(row, offset, use_chromnames)
            down_exon_five, down_exon_three = GTFrow_to_5p3pcoords(
                down_exon_gtf, offset, use_chromnames)
            junctions.add((up_exon_five, three, five, down_exon_three))
Example #2
0
o = open(ofn, 'w')
o_before = open(ofn + ".before", 'w')
o_after = open(ofn + ".after", 'w')

print >> stderr, "Extending exonic regions by %s bases. Specify your own with option '-e' (see help for details)" % extend

exons = list()
for row in open(exonfn):
    h1 = row.strip().split('\t')[1]
    for h in h1.split(','):
        exons.append(h)

gex_coords = load(open(gexons_pic))
tabixfile = pysam.Tabixfile(tabixfn)
for exon in exons:
    is_terminal_exon, prev_exon, next_exon = terminal_exon(exon, gex_coords)
    if is_terminal_exon: continue

    # get the coordinates
    coords = augment_region(gex_coords[exon], extend)
    coords_before = augment_region(gex_coords[prev_exon], extend)
    coords_after = augment_region(gex_coords[next_exon], extend)

    # whether to use 'chr' or not
    if not use_chrom_names:
        coords = coords[3:]
        coords_before = coords_before[3:]
        coords_after = coords_after[3:]

    # get the GTF rows that satisfy this
    rows = [
Example #3
0
o = open(ofn,'w')
o_before = open(ofn+".before",'w')
o_after = open(ofn+".after",'w')

print >> stderr,"Extending exonic regions by %s bases. Specify your own with option '-e' (see help for details)"%extend

exons = list()
for row in open(exonfn):
	h1 = row.strip().split('\t')[1]
	for h in h1.split(','):
		exons.append(h)

gex_coords = load(open(gexons_pic))
tabixfile = pysam.Tabixfile(tabixfn)
for exon in exons:
	is_terminal_exon,prev_exon,next_exon = terminal_exon(exon,gex_coords)
	if is_terminal_exon: continue
	
	# get the coordinates
	coords = augment_region(gex_coords[exon],extend)
	coords_before = augment_region(gex_coords[prev_exon],extend)
	coords_after = augment_region(gex_coords[next_exon],extend)
	
	# whether to use 'chr' or not
	if not use_chrom_names:
		coords = coords[3:]
		coords_before = coords_before[3:]
		coords_after = coords_after[3:]
	
	# get the GTF rows that satisfy this
	rows = [row for row in tabixfile.fetch(region=coords) if row.strip().split('\t')[2] == "exon"]
Example #4
0
if feature_coords_fn:
	feature_coords = load(open(feature_coords_fn))

print >> stderr,"Note: using FASTA file at %s" % fastafn

f = open(gtffn)
junctions = set()
for row in f: # for each row in the input GTF
	# we need to get rid of first and last exons
	if row.strip().split('\t')[2] in ['start_codon','stop_codon','CDS']: continue
	if feature_coords_fn: # if we have the resource to check whether this is a terminal exon or not
#		print >> stderr,row.strip()
		strand = row.strip().split('\t')[6]
		feature,blank = parse_lines([row.strip()],strand,get_transcripts)
#		print >> stderr,feature
		is_terminal_feature,up_exon,down_exon = terminal_exon(list(feature)[0],feature_coords)
		if is_terminal_feature:	# if this is terminal ignore it
			continue
		# now we know that this is not a terminal exon so there must be neighbouring exons
		up_exon_gtf = region_to_GTF(up_exon,feature_coords[up_exon],get_transcripts)
		down_exon_gtf = region_to_GTF(down_exon,feature_coords[down_exon],get_transcripts)

 		if get_introns:
 			up_exon_five,up_exon_three = GTFrow_to_5p3pcoords(up_exon_gtf,offset,use_chromnames)
 			five,three = GTFrow_to_5p3pcoords(row,offset,use_chromnames)
 			down_exon_five,down_exon_three = GTFrow_to_5p3pcoords(down_exon_gtf,offset,use_chromnames)
 			junctions.add((up_exon_five,three,five,down_exon_three))
 		else:
			five,three = GTFrow_to_5p3pcoords(row,offset,use_chromnames)
			junctions.add((five,three))
f.close()