Exemple #1
0
def insertions():
    """Example insertions in ENSMUSG00000051951 (5 exons)."""
    return [
        # After third exon.
        Insertion(
            id='1',
            seqname='1',
            position=3207327,
            strand=1,
            support_junction=1,
            support_spanning=1,
            support=2,
            metadata=frozendict({
                'gene_id': 'ENSMUSG00000051951',
                'sample': 'S1'})),

        # After first exon.
        Insertion(
            id='2',
            seqname='1',
            position=3214491,
            strand=-1,
            support_junction=1,
            support_spanning=1,
            support=2,
            metadata=frozendict({
                'gene_id': 'ENSMUSG00000051951',
                'sample': 'S2'}))
    ] # yapf: disable
Exemple #2
0
def insertions():
    """Example insertions in ENSMUSG00000051951 (5 exons)."""
    return [
        # After third exon.
        Insertion(
            id='1',
            seqname='1',
            position=3207327,
            strand=1,
            support_junction=1,
            support_spanning=1,
            support=2,
            metadata=frozendict({
                'gene_id': 'ENSMUSG00000051951',
                'sample': 'S1'})),

        # After first exon.
        Insertion(
            id='2',
            seqname='1',
            position=3214491,
            strand=-1,
            support_junction=1,
            support_spanning=1,
            support=2,
            metadata=frozendict({
                'gene_id': 'ENSMUSG00000051951',
                'sample': 'S2'}))
    ] # yapf: disable
Exemple #3
0
def insertions():
    """Example insertion set."""

    return [
        _insertion(id='1', seqname='1', position=9, strand=1,
                   metadata=frozendict({'gene_id': 'gene_a', 'sample': 'S1'})),
        _insertion(id='2', seqname='1', position=15, strand=-1,
                   metadata=frozendict({'gene_id': 'gene_b', 'sample': 'S2'}))
    ] # yapf: disable
Exemple #4
0
def insertions():
    """Example insertion set."""

    return [
        _insertion(id='1', seqname='1', position=9, strand=1,
                   metadata=frozendict({'gene_id': 'gene_a', 'sample': 'S1'})),
        _insertion(id='2', seqname='1', position=15, strand=-1,
                   metadata=frozendict({'gene_id': 'gene_b', 'sample': 'S2'}))
    ] # yapf: disable
Exemple #5
0
def annotate_fusions_for_genes(fusions, reference):
    # type: (Iterable[Fusion], TranscriptReference) -> Iterable[Fusion]
    """Annotates fusions with genes overlapped by the genomic fusion site.

    Parameters
    ----------
    fusions : iterable[TransposonFusion]
        Fusions to annotate.
    gtf_path : pathlib.Path
        Path to (indexed) gtf file, containing gene exon features.

    Yields
    ------
    Fusion
        Fusions, annotated with overlapped genes.

    """

    for fusion in fusions:
        genes = reference.overlap_genes(fusion.genome_region)

        if len(genes) > 0:
            for gene in genes:
                gene_meta = {
                    'gene_name': gene.name,
                    'gene_strand': gene.strand,
                    'gene_id': gene.id
                }
                merged_meta = toolz.merge(fusion.metadata, gene_meta)
                yield fusion._replace(metadata=frozendict(merged_meta))
        else:
            yield fusion
Exemple #6
0
def _to_fusion_obj(fusion, transposon_name, is_paired):
    if fusion.seqname_a == transposon_name:
        gen_id, tr_id = 'b', 'a'
        gen_dir, tr_dir = 1, -1
    else:
        gen_id, tr_id = 'a', 'b'
        gen_dir, tr_dir = -1, 1

    strand_genome = fusion['strand_' + gen_id]
    strand_transposon = fusion['strand_' + tr_id]

    if is_paired:
        support_junction = fusion.supp_spanning_mates
        support_spanning = fusion.supp_mates
    else:
        support_junction = fusion.supp_reads
        support_spanning = 0

    return TransposonFusion(
        seqname=fusion['seqname_' + gen_id],
        anchor_genome=fusion['location_' + gen_id],
        anchor_transposon=fusion['location_' + tr_id],
        flank_genome=fusion['flank_' + gen_id] * strand_genome * gen_dir,
        flank_transposon=fusion['flank_' + tr_id] * strand_transposon * tr_dir,
        strand_genome=strand_genome,
        strand_transposon=strand_transposon,
        support_junction=support_junction,
        support_spanning=support_spanning,
        metadata=frozendict())
Exemple #7
0
    def from_fusion(cls, fusion, transposon_name, metadata=None):
        """Converts the fusion to a transposon fusion object."""

        if (fusion.seqname_a == transposon_name
                and fusion.seqname_b == transposon_name):
            raise ValueError('Fusion does not involve genomic sequence')
        elif (fusion.seqname_a != transposon_name
              and fusion.seqname_b != transposon_name):
            raise ValueError('Fusion does not involve transposon')
        elif fusion.seqname_a == transposon_name:
            tr_key, gen_key = 'a', 'b'
            tr_flank = fusion.flank_a * -fusion.strand_a
            gen_flank = fusion.flank_b * fusion.strand_b
        else:
            tr_key, gen_key = 'b', 'a'
            tr_flank = fusion.flank_b * fusion.strand_b
            gen_flank = fusion.flank_a * -fusion.strand_a

        return TransposonFusion(
            seqname=getattr(fusion, 'seqname_' + gen_key),
            anchor_genome=getattr(fusion, 'location_' + gen_key),
            anchor_transposon=getattr(fusion, 'location_' + tr_key),
            strand_genome=getattr(fusion, 'strand_' + gen_key),
            strand_transposon=getattr(fusion, 'strand_' + tr_key),
            flank_genome=gen_flank,
            flank_transposon=tr_flank,
            support_junction=fusion.support_junction,
            support_spanning=fusion.support_spanning,
            metadata=frozendict(metadata or {}))
Exemple #8
0
def annotate_fusions_for_genes(fusions, reference):
    # type: (Iterable[Fusion], TranscriptReference) -> Iterable[Fusion]
    """Annotates fusions with genes overlapped by the genomic fusion site.

    Parameters
    ----------
    fusions : iterable[TransposonFusion]
        Fusions to annotate.
    gtf_path : pathlib.Path
        Path to (indexed) gtf file, containing gene exon features.

    Yields
    ------
    Fusion
        Fusions, annotated with overlapped genes.

    """

    for fusion in fusions:
        genes = reference.overlap_genes(fusion.genome_region)

        if len(genes) > 0:
            for gene in genes:
                gene_meta = {
                    'gene_name': gene.name,
                    'gene_strand': gene.strand,
                    'gene_id': gene.id
                }
                merged_meta = toolz.merge(fusion.metadata, gene_meta)
                yield fusion._replace(metadata=frozendict(merged_meta))
        else:
            yield fusion
Exemple #9
0
    def from_fusion(cls, fusion, transposon_name, metadata=None):
        """Converts the fusion to a transposon fusion object."""

        if (fusion.seqname_a == transposon_name and
                fusion.seqname_b == transposon_name):
            raise ValueError('Fusion does not involve genomic sequence')
        elif (fusion.seqname_a != transposon_name and
              fusion.seqname_b != transposon_name):
            raise ValueError('Fusion does not involve transposon')
        elif fusion.seqname_a == transposon_name:
            tr_key, gen_key = 'a', 'b'
            tr_flank = fusion.flank_a * -fusion.strand_a
            gen_flank = fusion.flank_b * fusion.strand_b
        else:
            tr_key, gen_key = 'b', 'a'
            tr_flank = fusion.flank_b * fusion.strand_b
            gen_flank = fusion.flank_a * -fusion.strand_a

        return TransposonFusion(
            seqname=getattr(fusion, 'seqname_' + gen_key),
            anchor_genome=getattr(fusion, 'location_' + gen_key),
            anchor_transposon=getattr(fusion, 'location_' + tr_key),
            strand_genome=getattr(fusion, 'strand_' + gen_key),
            strand_transposon=getattr(fusion, 'strand_' + tr_key),
            flank_genome=gen_flank,
            flank_transposon=tr_flank,
            support_junction=fusion.support_junction,
            support_spanning=fusion.support_spanning,
            metadata=frozendict(metadata or {}))
Exemple #10
0
    def _to_obj(cls, record):
        record_dict = record._asdict()

        metadata_fields = [
            k for k in record_dict.keys() if k not in set(cls._get_columns())
        ]
        metadata = {k: record_dict.pop(k) for k in metadata_fields}

        metadata.pop('Index', None)

        return cls(metadata=frozendict(metadata), **record_dict)
Exemple #11
0
    def _to_obj(cls, record):
        record_dict = record._asdict()

        metadata_fields = [
            k for k in record_dict.keys() if k not in set(cls._get_columns())
        ]
        metadata = {k: record_dict.pop(k) for k in metadata_fields}

        metadata.pop('Index', None)

        return cls(metadata=frozendict(metadata), **record_dict)
Exemple #12
0
def rgag1_fusion():
    return TransposonFusion(
        seqname='X',
        anchor_genome=143093898,
        anchor_transposon=1541,
        strand_genome=1,
        strand_transposon=1,
        flank_genome=-78,
        flank_transposon=-76,
        support_junction=10,
        support_spanning=0,
        metadata=frozendict({}))
Exemple #13
0
def fusion():
    """Example fusion."""
    return TransposonFusion(
        seqname='16',
        anchor_genome=52141095,
        anchor_transposon=1541,
        strand_genome=-1,
        strand_transposon=1,
        flank_genome=-78,
        flank_transposon=-76,
        support_junction=380,
        support_spanning=118,
        metadata=frozendict({}))
Exemple #14
0
    def from_transposon_fusion(cls, fusion, id_=None, drop_metadata=None):
        """Converts (annotated) transposon fusion to an insertion.

        Requires

        Parameters
        ----------
        id : str
            ID to use for the insertion.

        Returns
        -------
        Insertion
            Insertion object derived from the transposon fusion.

        """

        if drop_metadata is None:
            drop_metadata = {'strand_genome', 'strand_transposon'}

        # Fusion should have annotated transposon feature.
        if 'feature_name' not in fusion.metadata:
            raise ValueError(
                'Fusion does not have an annotated transposon feature')

        strand = fusion.strand_genome * fusion.strand_transposon

        if 'gene_strand' in fusion.metadata:
            gene_strand = fusion.metadata['gene_strand']
            orientation = 'sense' if strand == gene_strand else 'antisense'
        else:
            orientation = None

        ins_metadata = toolz.keyfilter(lambda k: k not in drop_metadata,
                                       fusion.metadata)
        ins_metadata['transposon_anchor'] = fusion.anchor_transposon

        if orientation is not None:
            ins_metadata['orientation'] = orientation

        return Insertion(
            id=id_,
            seqname=fusion.seqname,
            position=fusion.anchor_genome,
            strand=strand,
            support_junction=fusion.support_junction,
            support_spanning=fusion.support_spanning,
            support=fusion.support,
            metadata=frozendict(ins_metadata))
Exemple #15
0
    def from_transposon_fusion(cls, fusion, id_=None, drop_metadata=None):
        """Converts (annotated) transposon fusion to an insertion.

        Requires

        Parameters
        ----------
        id : str
            ID to use for the insertion.

        Returns
        -------
        Insertion
            Insertion object derived from the transposon fusion.

        """

        if drop_metadata is None:
            drop_metadata = {'strand_genome', 'strand_transposon'}

        # Fusion should have annotated transposon feature.
        if 'feature_name' not in fusion.metadata:
            raise ValueError(
                'Fusion does not have an annotated transposon feature')

        strand = fusion.strand_genome * fusion.strand_transposon

        if 'gene_strand' in fusion.metadata:
            gene_strand = fusion.metadata['gene_strand']
            orientation = 'sense' if strand == gene_strand else 'antisense'
        else:
            orientation = None

        ins_metadata = toolz.keyfilter(lambda k: k not in drop_metadata,
                                       fusion.metadata)
        ins_metadata['transposon_anchor'] = fusion.anchor_transposon

        if orientation is not None:
            ins_metadata['orientation'] = orientation

        return Insertion(id=id_,
                         seqname=fusion.seqname,
                         position=fusion.anchor_genome,
                         strand=strand,
                         support_junction=fusion.support_junction,
                         support_spanning=fusion.support_spanning,
                         support=fusion.support,
                         metadata=frozendict(ins_metadata))
Exemple #16
0
def annotate_ffpm(fusions, fastq_path):
    # type: (Iterable[Fusion], pathlib.Path) -> Iterable[Fusion]
    """Annotates fusions with FFPM (Fusion Fragments Per Million) score."""

    # Calculate normalization factor.
    n_reads = count_lines(fastq_path) // 4
    norm_factor = (1.0 / n_reads) * 1e6

    for fusion in fusions:
        ffpm_meta = {
            'ffpm_junction': fusion.support_junction * norm_factor,
            'ffpm_spanning': fusion.support_spanning * norm_factor,
            'ffpm': fusion.support * norm_factor
        }
        merged_meta = toolz.merge(fusion.metadata, ffpm_meta)
        yield fusion._replace(metadata=frozendict(merged_meta))
Exemple #17
0
def annotate_ffpm(fusions, fastq_path):
    # type: (Iterable[Fusion], pathlib.Path) -> Iterable[Fusion]
    """Annotates fusions with FFPM (Fusion Fragments Per Million) score."""

    # Calculate normalization factor.
    n_reads = count_lines(fastq_path) // 4
    norm_factor = (1.0 / n_reads) * 1e6

    for fusion in fusions:
        ffpm_meta = {
            'ffpm_junction': fusion.support_junction * norm_factor,
            'ffpm_spanning': fusion.support_spanning * norm_factor,
            'ffpm': fusion.support * norm_factor
        }
        merged_meta = toolz.merge(fusion.metadata, ffpm_meta)
        yield fusion._replace(metadata=frozendict(merged_meta))
Exemple #18
0
def _insertion(id,
               seqname,
               position,
               strand,
               support_junction=1,
               support_spanning=1,
               metadata=None):
    """Helper function for building an Insertion instance."""
    return Insertion(id=id,
                     seqname=seqname,
                     position=position,
                     strand=strand,
                     support_junction=support_junction,
                     support_spanning=support_spanning,
                     support=support_junction + support_spanning,
                     metadata=frozendict(metadata or {}))
Exemple #19
0
def _insertion(id,
               seqname,
               position,
               strand,
               support_junction=1,
               support_spanning=1,
               metadata=None):
    """Helper function for building an Insertion instance."""
    return Insertion(
        id=id,
        seqname=seqname,
        position=position,
        strand=strand,
        support_junction=support_junction,
        support_spanning=support_spanning,
        support=support_junction + support_spanning,
        metadata=frozendict(metadata or {}))
Exemple #20
0
def insertion():
    return Insertion(
        id='INS_1',
        seqname='16',
        position=52141095,
        strand=-1,
        support_junction=380,
        support_spanning=118,
        support=498,
        metadata=frozendict({
            'feature_type': 'SA',
            'feature_name': 'En2SA',
            'gene_strand': 1,
            'orientation': 'antisense',
            'feature_strand': -1,
            'gene_name': 'Cblb',
            'transposon_anchor': 1541
        }))
Exemple #21
0
    def test_example(self, chimeric_data):
        """Test simple example."""

        fusions = list(star.extract_transposon_fusions(chimeric_data, 'T2onc'))

        assert fusions == [
            TransposonFusion(
                seqname='1',
                anchor_genome=300,
                anchor_transposon=420,
                strand_genome=-1,
                strand_transposon=-1,
                flank_genome=-62,
                flank_transposon=100,
                support_junction=4,
                support_spanning=3,
                metadata=frozendict({}))
        ]
Exemple #22
0
def tr_fusion():
    """Example transposon fusion."""
    return TransposonFusion(seqname='16',
                            anchor_genome=52141095,
                            anchor_transposon=1541,
                            strand_genome=-1,
                            strand_transposon=1,
                            flank_genome=-78,
                            flank_transposon=-76,
                            support_junction=380,
                            support_spanning=118,
                            metadata=frozendict({
                                'gene_strand': 1,
                                'gene_name': 'Cblb',
                                'feature_type': 'SA',
                                'feature_strand': -1,
                                'feature_name': 'En2SA'
                            }))
Exemple #23
0
    def test_from_fusion(self, fusion):
        """Tests from_fusion method."""

        tr_fusion = TransposonFusion.from_fusion(fusion, 'T2onc')

        expected = TransposonFusion(seqname='1',
                                    anchor_genome=300,
                                    anchor_transposon=420,
                                    strand_genome=1,
                                    strand_transposon=1,
                                    flank_genome=-52,
                                    flank_transposon=62,
                                    support_junction=2,
                                    support_spanning=1,
                                    metadata=frozendict({}))

        assert tr_fusion == expected
        assert isinstance(tr_fusion.metadata, frozendict)
Exemple #24
0
def tr_fusion():
    """Example transposon fusion."""
    return TransposonFusion(
        seqname='16',
        anchor_genome=52141095,
        anchor_transposon=1541,
        strand_genome=-1,
        strand_transposon=1,
        flank_genome=-78,
        flank_transposon=-76,
        support_junction=380,
        support_spanning=118,
        metadata=frozendict({
            'gene_strand': 1,
            'gene_name': 'Cblb',
            'feature_type': 'SA',
            'feature_strand': -1,
            'feature_name': 'En2SA'
        }))
Exemple #25
0
    def test_from_fusion(self, fusion):
        """Tests from_fusion method."""

        tr_fusion = TransposonFusion.from_fusion(fusion, 'T2onc')

        expected = TransposonFusion(
            seqname='1',
            anchor_genome=300,
            anchor_transposon=420,
            strand_genome=1,
            strand_transposon=1,
            flank_genome=-52,
            flank_transposon=62,
            support_junction=2,
            support_spanning=1,
            metadata=frozendict({}))

        assert tr_fusion == expected
        assert isinstance(tr_fusion.metadata, frozendict)
Exemple #26
0
def annotate_fusions_for_transposon(fusions, feature_path):
    # type: (Iterable[Fusion], pathlib.Path) -> Iterable[Fusion]
    """Annotates fusions with transposon features overlapped by the fusion.

    Parameters
    ----------
    fusions : iterable[TransposonFusion]
        Fusions to annotate.
    feature_path : str or pathlib.Path
        Path to TSV file containing transposon features.

    Yields
    ------
    Fusion
        Fusions, annotated with transposon features.

    """

    def _lookup_tree(tree, region):
        start, end = region
        overlap = tree[start:end]
        return [tup[2] for tup in overlap]

    # Build the feature tree.
    features = TransposonFeature.from_csv(feature_path, sep='\t')
    tree = IntervalTree.from_tuples((feat.start, feat.end, feat)
                                    for feat in features)

    for fusion in fusions:
        overlap = _lookup_tree(tree, fusion.transposon_region)

        if len(overlap) > 0:
            for feature in overlap:
                new_meta = {
                    'feature_name': feature.name,
                    'feature_type': feature.type,
                    'feature_strand': feature.strand
                }
                merged_meta = toolz.merge(fusion.metadata, new_meta)
                yield fusion._replace(metadata=frozendict(merged_meta))
        else:
            yield fusion
Exemple #27
0
def annotate_fusions_for_transposon(fusions, feature_path):
    # type: (Iterable[Fusion], pathlib.Path) -> Iterable[Fusion]
    """Annotates fusions with transposon features overlapped by the fusion.

    Parameters
    ----------
    fusions : iterable[TransposonFusion]
        Fusions to annotate.
    feature_path : str or pathlib.Path
        Path to TSV file containing transposon features.

    Yields
    ------
    Fusion
        Fusions, annotated with transposon features.

    """
    def _lookup_tree(tree, region):
        start, end = region
        overlap = tree[start:end]
        return [tup[2] for tup in overlap]

    # Build the feature tree.
    features = TransposonFeature.from_csv(feature_path, sep='\t')
    tree = IntervalTree.from_tuples(
        (feat.start, feat.end, feat) for feat in features)

    for fusion in fusions:
        overlap = _lookup_tree(tree, fusion.transposon_region)

        if len(overlap) > 0:
            for feature in overlap:
                new_meta = {
                    'feature_name': feature.name,
                    'feature_type': feature.type,
                    'feature_strand': feature.strand
                }
                merged_meta = toolz.merge(fusion.metadata, new_meta)
                yield fusion._replace(metadata=frozendict(merged_meta))
        else:
            yield fusion
Exemple #28
0
    def test_example_file(self, chimeric_junctions_path):
        """Test data read from example file."""

        # Extract fusions.
        chimeric_data = star.read_chimeric_junctions(chimeric_junctions_path)
        fusions = set(star.extract_transposon_fusions(chimeric_data, 'T2onc'))

        # Check number of fusions.
        assert len(fusions) == 7

        # Check key example.
        expected = TransposonFusion(
            seqname='16',
            anchor_genome=52141095,
            anchor_transposon=1541,
            strand_genome=-1,
            strand_transposon=1,
            flank_genome=-78,
            flank_transposon=-76,
            support_junction=380,
            support_spanning=118,
            metadata=frozendict({}))
        assert expected in fusions