def junction_exon_triples(chrom, exon_start_stop, transcripts, strand): from outrigger.index.events import stringify_location data = [] for transcript, exons in transcripts: for exon1, exon2 in zip(exons, exons[1:]): start1, stop1 = exon_start_stop[exon1] start2, stop2 = exon_start_stop[exon2] exon1_location = stringify_location(chrom, start1, stop1, strand, 'exon') exon2_location = stringify_location(chrom, start2, stop2, strand, 'exon') # if strand == '-': # start = stop2 + 1 # stop = start1 - 1 # else: start = stop1 + 1 stop = start2 - 1 junction_location = stringify_location(chrom, start, stop, strand, 'junction') if strand == '-': data.append([exon1_location, 'downstream', junction_location]) data.append([exon2_location, 'upstream', junction_location]) else: data.append([exon1_location, 'upstream', junction_location]) data.append([exon2_location, 'downstream', junction_location]) data = pd.DataFrame(data, columns=['exon', 'direction', 'junction']) data = data.drop_duplicates() return data
def graph_items(exon_start_stop, transcripts, chrom, strand): from outrigger.index.events import stringify_location, opposite graph = connect(":memory:", graphs=['upstream', 'downstream']) items = [] triples = set() for transcript, exons in transcripts: for exon1, exon2 in zip(exons, exons[1:]): start1, stop1 = exon_start_stop[exon1] start2, stop2 = exon_start_stop[exon2] exon1_location = stringify_location(chrom, start1, stop1, strand, 'exon') exon2_location = stringify_location(chrom, start2, stop2, strand, 'exon') start = stop1 + 1 stop = start2 - 1 junction_location = stringify_location(chrom, start, stop, strand, 'junction') if exon1_location not in items: items.append(exon1_location) if exon2_location not in items: items.append(exon2_location) if junction_location not in items: items.append(junction_location) # Get unique integer for junction junction_i = items.index(junction_location) if strand == '-': exon1_triple = exon1_location, 'downstream', junction_location exon2_triple = exon2_location, 'upstream', junction_location else: exon1_triple = exon1_location, 'upstream', junction_location exon2_triple = exon2_location, 'downstream', junction_location exon_triples = exon1_triple, exon2_triple with graph.transaction() as tr: for exon_triple in exon_triples: if exon_triple not in triples: triples.add(exon_triple) exon, direction, junction = exon_triple # Get unique integer for exon exon_i = items.index(exon) tr.store(getattr(V(exon_i), direction)(junction_i)) tr.store( getattr(V(junction_i), opposite(direction))(exon_i)) else: continue items = tuple(items) return graph, items
def graph_items(exon_start_stop, transcripts, chrom, strand): from outrigger.index.events import stringify_location, opposite graph = connect(":memory:", graphs=['upstream', 'downstream']) items = [] triples = set() for transcript, exons in transcripts: for exon1, exon2 in zip(exons, exons[1:]): start1, stop1 = exon_start_stop[exon1] start2, stop2 = exon_start_stop[exon2] exon1_location = stringify_location(chrom, start1, stop1, strand, 'exon') exon2_location = stringify_location(chrom, start2, stop2, strand, 'exon') start = stop1 + 1 stop = start2 - 1 junction_location = stringify_location(chrom, start, stop, strand, 'junction') if exon1_location not in items: items.append(exon1_location) if exon2_location not in items: items.append(exon2_location) if junction_location not in items: items.append(junction_location) # Get unique integer for junction junction_i = items.index(junction_location) if strand == '-': exon1_triple = exon1_location, 'downstream', junction_location exon2_triple = exon2_location, 'upstream', junction_location else: exon1_triple = exon1_location, 'upstream', junction_location exon2_triple = exon2_location, 'downstream', junction_location exon_triples = exon1_triple, exon2_triple with graph.transaction() as tr: for exon_triple in exon_triples: if exon_triple not in triples: triples.add(exon_triple) exon, direction, junction = exon_triple # Get unique integer for exon exon_i = items.index(exon) tr.store(getattr(V(exon_i), direction)(junction_i)) tr.store(getattr(V(junction_i), opposite(direction))( exon_i)) else: continue items = tuple(items) return graph, items
def test_stringify_location(chrom, strand, region): from outrigger.index.events import stringify_location test = stringify_location(chrom, 100, 200, strand, region) if region is None: true = '{0}:{1}-{2}:{3}'.format(chrom, 100, 200, strand) else: true = '{0}:{1}:{2}-{3}:{4}'.format(region, chrom, 100, 200, strand) assert test == true
def junction_to_exons(chrom, exon_start_stop, transcripts, strand): from collections import defaultdict from outrigger.index.events import stringify_location data = defaultdict(lambda: {'upstream': set([]), 'downstream': set([])}) for transcript, exons in transcripts: for exon1, exon2 in zip(exons, exons[1:]): start1, stop1 = exon_start_stop[exon1] start2, stop2 = exon_start_stop[exon2] exon1_location = stringify_location(chrom, start1, stop1, strand, 'exon') exon2_location = stringify_location(chrom, start2, stop2, strand, 'exon') # if strand == '-': # start = stop2 + 1 # stop = start1 - 1 # else: start = stop1 + 1 stop = start2 - 1 junction_location = stringify_location(chrom, start, stop, strand, 'junction') if strand == '-': data[junction_location]['downstream'].add(exon1_location) data[junction_location]['upstream'].add(exon2_location) else: data[junction_location]['upstream'].add(exon1_location) data[junction_location]['downstream'].add(exon2_location) data = pd.DataFrame(data).T data = data.applymap(lambda x: ','.join(x)) data = data.reset_index() data = data.rename(columns={'index': 'junction'}) return data
def junction_exon_triples(chrom, exon_start_stop, transcripts, strand): from outrigger.index.events import stringify_location data = [] for transcript, exons in transcripts: for exon1, exon2 in zip(exons, exons[1:]): start1, stop1 = exon_start_stop[exon1] start2, stop2 = exon_start_stop[exon2] exon1_location = stringify_location(chrom, start1, stop1, strand, 'exon') exon2_location = stringify_location(chrom, start2, stop2, strand, 'exon') # if strand == '-': # start = stop2 + 1 # stop = start1 - 1 # else: start = stop1 + 1 stop = start2 - 1 junction_location = stringify_location(chrom, start, stop, strand, 'junction') if strand == '-': data.append( [exon1_location, 'downstream', junction_location]) data.append( [exon2_location, 'upstream', junction_location]) else: data.append( [exon1_location, 'upstream', junction_location]) data.append( [exon2_location, 'downstream', junction_location]) data = pd.DataFrame(data, columns=['exon', 'direction', 'junction']) data = data.drop_duplicates() return data