Exemple #1
0
def junction_exon_triples(chrom, exon_start_stop, transcripts, strand):
    from outrigger.index.events import stringify_location
    data = []

    for transcript, exons in transcripts:
        for exon1, exon2 in zip(exons, exons[1:]):

            start1, stop1 = exon_start_stop[exon1]
            start2, stop2 = exon_start_stop[exon2]
            exon1_location = stringify_location(chrom, start1, stop1, strand,
                                                'exon')
            exon2_location = stringify_location(chrom, start2, stop2, strand,
                                                'exon')

            # if strand == '-':
            #     start = stop2 + 1
            #     stop = start1 - 1
            # else:
            start = stop1 + 1
            stop = start2 - 1

            junction_location = stringify_location(chrom, start, stop, strand,
                                                   'junction')

            if strand == '-':
                data.append([exon1_location, 'downstream', junction_location])
                data.append([exon2_location, 'upstream', junction_location])
            else:
                data.append([exon1_location, 'upstream', junction_location])
                data.append([exon2_location, 'downstream', junction_location])
    data = pd.DataFrame(data, columns=['exon', 'direction', 'junction'])
    data = data.drop_duplicates()
    return data
Exemple #2
0
def graph_items(exon_start_stop, transcripts, chrom, strand):
    from outrigger.index.events import stringify_location, opposite

    graph = connect(":memory:", graphs=['upstream', 'downstream'])

    items = []
    triples = set()

    for transcript, exons in transcripts:
        for exon1, exon2 in zip(exons, exons[1:]):

            start1, stop1 = exon_start_stop[exon1]
            start2, stop2 = exon_start_stop[exon2]
            exon1_location = stringify_location(chrom, start1, stop1, strand,
                                                'exon')
            exon2_location = stringify_location(chrom, start2, stop2, strand,
                                                'exon')

            start = stop1 + 1
            stop = start2 - 1

            junction_location = stringify_location(chrom, start, stop, strand,
                                                   'junction')

            if exon1_location not in items:
                items.append(exon1_location)
            if exon2_location not in items:
                items.append(exon2_location)
            if junction_location not in items:
                items.append(junction_location)

            # Get unique integer for junction
            junction_i = items.index(junction_location)

            if strand == '-':
                exon1_triple = exon1_location, 'downstream', junction_location
                exon2_triple = exon2_location, 'upstream', junction_location
            else:
                exon1_triple = exon1_location, 'upstream', junction_location
                exon2_triple = exon2_location, 'downstream', junction_location

            exon_triples = exon1_triple, exon2_triple

            with graph.transaction() as tr:
                for exon_triple in exon_triples:
                    if exon_triple not in triples:
                        triples.add(exon_triple)

                        exon, direction, junction = exon_triple

                        # Get unique integer for exon
                        exon_i = items.index(exon)
                        tr.store(getattr(V(exon_i), direction)(junction_i))
                        tr.store(
                            getattr(V(junction_i),
                                    opposite(direction))(exon_i))
                    else:
                        continue
    items = tuple(items)
    return graph, items
Exemple #3
0
def graph_items(exon_start_stop, transcripts, chrom, strand):
    from outrigger.index.events import stringify_location, opposite

    graph = connect(":memory:", graphs=['upstream', 'downstream'])

    items = []
    triples = set()

    for transcript, exons in transcripts:
        for exon1, exon2 in zip(exons, exons[1:]):

            start1, stop1 = exon_start_stop[exon1]
            start2, stop2 = exon_start_stop[exon2]
            exon1_location = stringify_location(chrom, start1, stop1, strand,
                                                'exon')
            exon2_location = stringify_location(chrom, start2, stop2, strand,
                                                'exon')

            start = stop1 + 1
            stop = start2 - 1

            junction_location = stringify_location(chrom, start, stop, strand,
                                                   'junction')

            if exon1_location not in items:
                items.append(exon1_location)
            if exon2_location not in items:
                items.append(exon2_location)
            if junction_location not in items:
                items.append(junction_location)

            # Get unique integer for junction
            junction_i = items.index(junction_location)

            if strand == '-':
                exon1_triple = exon1_location, 'downstream', junction_location
                exon2_triple = exon2_location, 'upstream', junction_location
            else:
                exon1_triple = exon1_location, 'upstream', junction_location
                exon2_triple = exon2_location, 'downstream', junction_location

            exon_triples = exon1_triple, exon2_triple

            with graph.transaction() as tr:
                for exon_triple in exon_triples:
                    if exon_triple not in triples:
                        triples.add(exon_triple)

                        exon, direction, junction = exon_triple

                        # Get unique integer for exon
                        exon_i = items.index(exon)
                        tr.store(getattr(V(exon_i), direction)(junction_i))
                        tr.store(getattr(V(junction_i), opposite(direction))(
                            exon_i))
                    else:
                        continue
    items = tuple(items)
    return graph, items
Exemple #4
0
def test_stringify_location(chrom, strand, region):
    from outrigger.index.events import stringify_location

    test = stringify_location(chrom, 100, 200, strand, region)

    if region is None:
        true = '{0}:{1}-{2}:{3}'.format(chrom, 100, 200, strand)
    else:
        true = '{0}:{1}:{2}-{3}:{4}'.format(region, chrom, 100, 200, strand)
    assert test == true
Exemple #5
0
def test_stringify_location(chrom, strand, region):
    from outrigger.index.events import stringify_location

    test = stringify_location(chrom, 100, 200, strand, region)

    if region is None:
        true = '{0}:{1}-{2}:{3}'.format(chrom, 100, 200, strand)
    else:
        true = '{0}:{1}:{2}-{3}:{4}'.format(region, chrom, 100, 200, strand)
    assert test == true
Exemple #6
0
def junction_to_exons(chrom, exon_start_stop, transcripts, strand):
    from collections import defaultdict
    from outrigger.index.events import stringify_location

    data = defaultdict(lambda: {'upstream': set([]), 'downstream': set([])})

    for transcript, exons in transcripts:
        for exon1, exon2 in zip(exons, exons[1:]):

            start1, stop1 = exon_start_stop[exon1]
            start2, stop2 = exon_start_stop[exon2]
            exon1_location = stringify_location(chrom, start1, stop1, strand,
                                                'exon')
            exon2_location = stringify_location(chrom, start2, stop2, strand,
                                                'exon')

            # if strand == '-':
            #     start = stop2 + 1
            #     stop = start1 - 1
            # else:
            start = stop1 + 1
            stop = start2 - 1

            junction_location = stringify_location(chrom, start, stop, strand,
                                                   'junction')

            if strand == '-':
                data[junction_location]['downstream'].add(exon1_location)
                data[junction_location]['upstream'].add(exon2_location)
            else:
                data[junction_location]['upstream'].add(exon1_location)
                data[junction_location]['downstream'].add(exon2_location)
    data = pd.DataFrame(data).T
    data = data.applymap(lambda x: ','.join(x))
    data = data.reset_index()
    data = data.rename(columns={'index': 'junction'})
    return data
Exemple #7
0
def junction_to_exons(chrom, exon_start_stop, transcripts, strand):
    from collections import defaultdict
    from outrigger.index.events import stringify_location

    data = defaultdict(lambda: {'upstream': set([]), 'downstream': set([])})

    for transcript, exons in transcripts:
        for exon1, exon2 in zip(exons, exons[1:]):

            start1, stop1 = exon_start_stop[exon1]
            start2, stop2 = exon_start_stop[exon2]
            exon1_location = stringify_location(chrom, start1, stop1,
                                                strand, 'exon')
            exon2_location = stringify_location(chrom, start2, stop2,
                                                strand, 'exon')

            # if strand == '-':
            #     start = stop2 + 1
            #     stop = start1 - 1
            # else:
            start = stop1 + 1
            stop = start2 - 1

            junction_location = stringify_location(chrom, start,
                                                   stop, strand, 'junction')

            if strand == '-':
                data[junction_location]['downstream'].add(exon1_location)
                data[junction_location]['upstream'].add(exon2_location)
            else:
                data[junction_location]['upstream'].add(exon1_location)
                data[junction_location]['downstream'].add(exon2_location)
    data = pd.DataFrame(data).T
    data = data.applymap(lambda x: ','.join(x))
    data = data.reset_index()
    data = data.rename(columns={'index': 'junction'})
    return data
Exemple #8
0
def junction_exon_triples(chrom, exon_start_stop, transcripts, strand):
    from outrigger.index.events import stringify_location
    data = []

    for transcript, exons in transcripts:
        for exon1, exon2 in zip(exons, exons[1:]):

            start1, stop1 = exon_start_stop[exon1]
            start2, stop2 = exon_start_stop[exon2]
            exon1_location = stringify_location(chrom, start1, stop1,
                                                strand, 'exon')
            exon2_location = stringify_location(chrom, start2, stop2,
                                                strand, 'exon')

            # if strand == '-':
            #     start = stop2 + 1
            #     stop = start1 - 1
            # else:
            start = stop1 + 1
            stop = start2 - 1

            junction_location = stringify_location(chrom, start, stop,
                                                   strand, 'junction')

            if strand == '-':
                data.append(
                    [exon1_location, 'downstream', junction_location])
                data.append(
                    [exon2_location, 'upstream', junction_location])
            else:
                data.append(
                    [exon1_location, 'upstream', junction_location])
                data.append(
                    [exon2_location, 'downstream', junction_location])
    data = pd.DataFrame(data, columns=['exon', 'direction', 'junction'])
    data = data.drop_duplicates()
    return data