Beispiel #1
0
def extract_transposon_fusions(chimeric_data,
                               transposon_name,
                               merge_junction_dist=10,
                               max_spanning_dist=300,
                               max_junction_dist=10000):
    """Extracts transposon fusions from a STAR chimeric read dataframe."""

    # Subset and normalize chimeric reads for transposon.
    chimeric_data = chimeric_data.loc[
        (chimeric_data['seqname_a'] == transposon_name) ^
        (chimeric_data['seqname_b'] == transposon_name)] # yapf: disable

    chimeric_data = normalize_chimeric_junctions(chimeric_data,
                                                 seqname=transposon_name)

    # Extract junction fusions and merge close junctions.
    junctions = list(
        extract_junction_fusions(chimeric_data,
                                 merge_dist=merge_junction_dist))

    # Assign spanning reads to junctions.
    junctions, unassigned = assign_spanning_reads(
        junctions,
        chimeric_data,
        max_dist_left=max_spanning_dist,
        max_dist_right=max_junction_dist)

    # Extract spanning fusions from unused reads.
    spanning = extract_spanning_fusions(unassigned, max_dist=max_spanning_dist)
    fusions = itertools.chain.from_iterable([junctions, spanning])

    # Convert to transposon fusions.
    for fusion in fusions:
        yield TransposonFusion.from_fusion(fusion, transposon_name)
Beispiel #2
0
def _to_fusion_obj(fusion, transposon_name, is_paired):
    if fusion.seqname_a == transposon_name:
        gen_id, tr_id = 'b', 'a'
        gen_dir, tr_dir = 1, -1
    else:
        gen_id, tr_id = 'a', 'b'
        gen_dir, tr_dir = -1, 1

    strand_genome = fusion['strand_' + gen_id]
    strand_transposon = fusion['strand_' + tr_id]

    if is_paired:
        support_junction = fusion.supp_spanning_mates
        support_spanning = fusion.supp_mates
    else:
        support_junction = fusion.supp_reads
        support_spanning = 0

    return TransposonFusion(
        seqname=fusion['seqname_' + gen_id],
        anchor_genome=fusion['location_' + gen_id],
        anchor_transposon=fusion['location_' + tr_id],
        flank_genome=fusion['flank_' + gen_id] * strand_genome * gen_dir,
        flank_transposon=fusion['flank_' + tr_id] * strand_transposon * tr_dir,
        strand_genome=strand_genome,
        strand_transposon=strand_transposon,
        support_junction=support_junction,
        support_spanning=support_spanning,
        metadata=frozendict())
Beispiel #3
0
def extract_transposon_fusions(chimeric_data,
                               transposon_name,
                               merge_junction_dist=10,
                               max_spanning_dist=300,
                               max_junction_dist=10000):
    """Extracts transposon fusions from a STAR chimeric read dataframe."""

    # Subset and normalize chimeric reads for transposon.
    chimeric_data = chimeric_data.loc[
        (chimeric_data['seqname_a'] == transposon_name) ^
        (chimeric_data['seqname_b'] == transposon_name)] # yapf: disable

    chimeric_data = normalize_chimeric_junctions(
        chimeric_data, seqname=transposon_name)

    # Extract junction fusions and merge close junctions.
    junctions = list(
        extract_junction_fusions(
            chimeric_data, merge_dist=merge_junction_dist))

    # Assign spanning reads to junctions.
    junctions, unassigned = assign_spanning_reads(
        junctions,
        chimeric_data,
        max_dist_left=max_spanning_dist,
        max_dist_right=max_junction_dist)

    # Extract spanning fusions from unused reads.
    spanning = extract_spanning_fusions(unassigned, max_dist=max_spanning_dist)
    fusions = itertools.chain.from_iterable([junctions, spanning])

    # Convert to transposon fusions.
    for fusion in fusions:
        yield TransposonFusion.from_fusion(fusion, transposon_name)
Beispiel #4
0
    def test_from_fusion(self, fusion):
        """Tests from_fusion method."""

        tr_fusion = TransposonFusion.from_fusion(fusion, 'T2onc')

        expected = TransposonFusion(seqname='1',
                                    anchor_genome=300,
                                    anchor_transposon=420,
                                    strand_genome=1,
                                    strand_transposon=1,
                                    flank_genome=-52,
                                    flank_transposon=62,
                                    support_junction=2,
                                    support_spanning=1,
                                    metadata=frozendict({}))

        assert tr_fusion == expected
        assert isinstance(tr_fusion.metadata, frozendict)
Beispiel #5
0
def rgag1_fusion():
    return TransposonFusion(
        seqname='X',
        anchor_genome=143093898,
        anchor_transposon=1541,
        strand_genome=1,
        strand_transposon=1,
        flank_genome=-78,
        flank_transposon=-76,
        support_junction=10,
        support_spanning=0,
        metadata=frozendict({}))
Beispiel #6
0
def fusion():
    """Example fusion."""
    return TransposonFusion(
        seqname='16',
        anchor_genome=52141095,
        anchor_transposon=1541,
        strand_genome=-1,
        strand_transposon=1,
        flank_genome=-78,
        flank_transposon=-76,
        support_junction=380,
        support_spanning=118,
        metadata=frozendict({}))
Beispiel #7
0
    def test_example(self, chimeric_data):
        """Test simple example."""

        fusions = list(star.extract_transposon_fusions(chimeric_data, 'T2onc'))

        assert fusions == [
            TransposonFusion(
                seqname='1',
                anchor_genome=300,
                anchor_transposon=420,
                strand_genome=-1,
                strand_transposon=-1,
                flank_genome=-62,
                flank_transposon=100,
                support_junction=4,
                support_spanning=3,
                metadata=frozendict({}))
        ]
Beispiel #8
0
def tr_fusion():
    """Example transposon fusion."""
    return TransposonFusion(seqname='16',
                            anchor_genome=52141095,
                            anchor_transposon=1541,
                            strand_genome=-1,
                            strand_transposon=1,
                            flank_genome=-78,
                            flank_transposon=-76,
                            support_junction=380,
                            support_spanning=118,
                            metadata=frozendict({
                                'gene_strand': 1,
                                'gene_name': 'Cblb',
                                'feature_type': 'SA',
                                'feature_strand': -1,
                                'feature_name': 'En2SA'
                            }))
Beispiel #9
0
    def test_from_fusion(self, fusion):
        """Tests from_fusion method."""

        tr_fusion = TransposonFusion.from_fusion(fusion, 'T2onc')

        expected = TransposonFusion(
            seqname='1',
            anchor_genome=300,
            anchor_transposon=420,
            strand_genome=1,
            strand_transposon=1,
            flank_genome=-52,
            flank_transposon=62,
            support_junction=2,
            support_spanning=1,
            metadata=frozendict({}))

        assert tr_fusion == expected
        assert isinstance(tr_fusion.metadata, frozendict)
Beispiel #10
0
    def test_example_file(self, chimeric_junctions_path):
        """Test data read from example file."""

        # Extract fusions.
        chimeric_data = star.read_chimeric_junctions(chimeric_junctions_path)
        fusions = set(star.extract_transposon_fusions(chimeric_data, 'T2onc'))

        # Check number of fusions.
        assert len(fusions) == 7

        # Check key example.
        expected = TransposonFusion(
            seqname='16',
            anchor_genome=52141095,
            anchor_transposon=1541,
            strand_genome=-1,
            strand_transposon=1,
            flank_genome=-78,
            flank_transposon=-76,
            support_junction=380,
            support_spanning=118,
            metadata=frozendict({}))
        assert expected in fusions