def insertions(): """Example insertions in ENSMUSG00000051951 (5 exons).""" return [ # After third exon. Insertion( id='1', seqname='1', position=3207327, strand=1, support_junction=1, support_spanning=1, support=2, metadata=frozendict({ 'gene_id': 'ENSMUSG00000051951', 'sample': 'S1'})), # After first exon. Insertion( id='2', seqname='1', position=3214491, strand=-1, support_junction=1, support_spanning=1, support=2, metadata=frozendict({ 'gene_id': 'ENSMUSG00000051951', 'sample': 'S2'})) ] # yapf: disable
def insertions(): """Example insertion set.""" return [ _insertion(id='1', seqname='1', position=9, strand=1, metadata=frozendict({'gene_id': 'gene_a', 'sample': 'S1'})), _insertion(id='2', seqname='1', position=15, strand=-1, metadata=frozendict({'gene_id': 'gene_b', 'sample': 'S2'})) ] # yapf: disable
def annotate_fusions_for_genes(fusions, reference): # type: (Iterable[Fusion], TranscriptReference) -> Iterable[Fusion] """Annotates fusions with genes overlapped by the genomic fusion site. Parameters ---------- fusions : iterable[TransposonFusion] Fusions to annotate. gtf_path : pathlib.Path Path to (indexed) gtf file, containing gene exon features. Yields ------ Fusion Fusions, annotated with overlapped genes. """ for fusion in fusions: genes = reference.overlap_genes(fusion.genome_region) if len(genes) > 0: for gene in genes: gene_meta = { 'gene_name': gene.name, 'gene_strand': gene.strand, 'gene_id': gene.id } merged_meta = toolz.merge(fusion.metadata, gene_meta) yield fusion._replace(metadata=frozendict(merged_meta)) else: yield fusion
def _to_fusion_obj(fusion, transposon_name, is_paired): if fusion.seqname_a == transposon_name: gen_id, tr_id = 'b', 'a' gen_dir, tr_dir = 1, -1 else: gen_id, tr_id = 'a', 'b' gen_dir, tr_dir = -1, 1 strand_genome = fusion['strand_' + gen_id] strand_transposon = fusion['strand_' + tr_id] if is_paired: support_junction = fusion.supp_spanning_mates support_spanning = fusion.supp_mates else: support_junction = fusion.supp_reads support_spanning = 0 return TransposonFusion( seqname=fusion['seqname_' + gen_id], anchor_genome=fusion['location_' + gen_id], anchor_transposon=fusion['location_' + tr_id], flank_genome=fusion['flank_' + gen_id] * strand_genome * gen_dir, flank_transposon=fusion['flank_' + tr_id] * strand_transposon * tr_dir, strand_genome=strand_genome, strand_transposon=strand_transposon, support_junction=support_junction, support_spanning=support_spanning, metadata=frozendict())
def from_fusion(cls, fusion, transposon_name, metadata=None): """Converts the fusion to a transposon fusion object.""" if (fusion.seqname_a == transposon_name and fusion.seqname_b == transposon_name): raise ValueError('Fusion does not involve genomic sequence') elif (fusion.seqname_a != transposon_name and fusion.seqname_b != transposon_name): raise ValueError('Fusion does not involve transposon') elif fusion.seqname_a == transposon_name: tr_key, gen_key = 'a', 'b' tr_flank = fusion.flank_a * -fusion.strand_a gen_flank = fusion.flank_b * fusion.strand_b else: tr_key, gen_key = 'b', 'a' tr_flank = fusion.flank_b * fusion.strand_b gen_flank = fusion.flank_a * -fusion.strand_a return TransposonFusion( seqname=getattr(fusion, 'seqname_' + gen_key), anchor_genome=getattr(fusion, 'location_' + gen_key), anchor_transposon=getattr(fusion, 'location_' + tr_key), strand_genome=getattr(fusion, 'strand_' + gen_key), strand_transposon=getattr(fusion, 'strand_' + tr_key), flank_genome=gen_flank, flank_transposon=tr_flank, support_junction=fusion.support_junction, support_spanning=fusion.support_spanning, metadata=frozendict(metadata or {}))
def _to_obj(cls, record): record_dict = record._asdict() metadata_fields = [ k for k in record_dict.keys() if k not in set(cls._get_columns()) ] metadata = {k: record_dict.pop(k) for k in metadata_fields} metadata.pop('Index', None) return cls(metadata=frozendict(metadata), **record_dict)
def rgag1_fusion(): return TransposonFusion( seqname='X', anchor_genome=143093898, anchor_transposon=1541, strand_genome=1, strand_transposon=1, flank_genome=-78, flank_transposon=-76, support_junction=10, support_spanning=0, metadata=frozendict({}))
def fusion(): """Example fusion.""" return TransposonFusion( seqname='16', anchor_genome=52141095, anchor_transposon=1541, strand_genome=-1, strand_transposon=1, flank_genome=-78, flank_transposon=-76, support_junction=380, support_spanning=118, metadata=frozendict({}))
def from_transposon_fusion(cls, fusion, id_=None, drop_metadata=None): """Converts (annotated) transposon fusion to an insertion. Requires Parameters ---------- id : str ID to use for the insertion. Returns ------- Insertion Insertion object derived from the transposon fusion. """ if drop_metadata is None: drop_metadata = {'strand_genome', 'strand_transposon'} # Fusion should have annotated transposon feature. if 'feature_name' not in fusion.metadata: raise ValueError( 'Fusion does not have an annotated transposon feature') strand = fusion.strand_genome * fusion.strand_transposon if 'gene_strand' in fusion.metadata: gene_strand = fusion.metadata['gene_strand'] orientation = 'sense' if strand == gene_strand else 'antisense' else: orientation = None ins_metadata = toolz.keyfilter(lambda k: k not in drop_metadata, fusion.metadata) ins_metadata['transposon_anchor'] = fusion.anchor_transposon if orientation is not None: ins_metadata['orientation'] = orientation return Insertion( id=id_, seqname=fusion.seqname, position=fusion.anchor_genome, strand=strand, support_junction=fusion.support_junction, support_spanning=fusion.support_spanning, support=fusion.support, metadata=frozendict(ins_metadata))
def from_transposon_fusion(cls, fusion, id_=None, drop_metadata=None): """Converts (annotated) transposon fusion to an insertion. Requires Parameters ---------- id : str ID to use for the insertion. Returns ------- Insertion Insertion object derived from the transposon fusion. """ if drop_metadata is None: drop_metadata = {'strand_genome', 'strand_transposon'} # Fusion should have annotated transposon feature. if 'feature_name' not in fusion.metadata: raise ValueError( 'Fusion does not have an annotated transposon feature') strand = fusion.strand_genome * fusion.strand_transposon if 'gene_strand' in fusion.metadata: gene_strand = fusion.metadata['gene_strand'] orientation = 'sense' if strand == gene_strand else 'antisense' else: orientation = None ins_metadata = toolz.keyfilter(lambda k: k not in drop_metadata, fusion.metadata) ins_metadata['transposon_anchor'] = fusion.anchor_transposon if orientation is not None: ins_metadata['orientation'] = orientation return Insertion(id=id_, seqname=fusion.seqname, position=fusion.anchor_genome, strand=strand, support_junction=fusion.support_junction, support_spanning=fusion.support_spanning, support=fusion.support, metadata=frozendict(ins_metadata))
def annotate_ffpm(fusions, fastq_path): # type: (Iterable[Fusion], pathlib.Path) -> Iterable[Fusion] """Annotates fusions with FFPM (Fusion Fragments Per Million) score.""" # Calculate normalization factor. n_reads = count_lines(fastq_path) // 4 norm_factor = (1.0 / n_reads) * 1e6 for fusion in fusions: ffpm_meta = { 'ffpm_junction': fusion.support_junction * norm_factor, 'ffpm_spanning': fusion.support_spanning * norm_factor, 'ffpm': fusion.support * norm_factor } merged_meta = toolz.merge(fusion.metadata, ffpm_meta) yield fusion._replace(metadata=frozendict(merged_meta))
def _insertion(id, seqname, position, strand, support_junction=1, support_spanning=1, metadata=None): """Helper function for building an Insertion instance.""" return Insertion(id=id, seqname=seqname, position=position, strand=strand, support_junction=support_junction, support_spanning=support_spanning, support=support_junction + support_spanning, metadata=frozendict(metadata or {}))
def _insertion(id, seqname, position, strand, support_junction=1, support_spanning=1, metadata=None): """Helper function for building an Insertion instance.""" return Insertion( id=id, seqname=seqname, position=position, strand=strand, support_junction=support_junction, support_spanning=support_spanning, support=support_junction + support_spanning, metadata=frozendict(metadata or {}))
def insertion(): return Insertion( id='INS_1', seqname='16', position=52141095, strand=-1, support_junction=380, support_spanning=118, support=498, metadata=frozendict({ 'feature_type': 'SA', 'feature_name': 'En2SA', 'gene_strand': 1, 'orientation': 'antisense', 'feature_strand': -1, 'gene_name': 'Cblb', 'transposon_anchor': 1541 }))
def test_example(self, chimeric_data): """Test simple example.""" fusions = list(star.extract_transposon_fusions(chimeric_data, 'T2onc')) assert fusions == [ TransposonFusion( seqname='1', anchor_genome=300, anchor_transposon=420, strand_genome=-1, strand_transposon=-1, flank_genome=-62, flank_transposon=100, support_junction=4, support_spanning=3, metadata=frozendict({})) ]
def tr_fusion(): """Example transposon fusion.""" return TransposonFusion(seqname='16', anchor_genome=52141095, anchor_transposon=1541, strand_genome=-1, strand_transposon=1, flank_genome=-78, flank_transposon=-76, support_junction=380, support_spanning=118, metadata=frozendict({ 'gene_strand': 1, 'gene_name': 'Cblb', 'feature_type': 'SA', 'feature_strand': -1, 'feature_name': 'En2SA' }))
def test_from_fusion(self, fusion): """Tests from_fusion method.""" tr_fusion = TransposonFusion.from_fusion(fusion, 'T2onc') expected = TransposonFusion(seqname='1', anchor_genome=300, anchor_transposon=420, strand_genome=1, strand_transposon=1, flank_genome=-52, flank_transposon=62, support_junction=2, support_spanning=1, metadata=frozendict({})) assert tr_fusion == expected assert isinstance(tr_fusion.metadata, frozendict)
def tr_fusion(): """Example transposon fusion.""" return TransposonFusion( seqname='16', anchor_genome=52141095, anchor_transposon=1541, strand_genome=-1, strand_transposon=1, flank_genome=-78, flank_transposon=-76, support_junction=380, support_spanning=118, metadata=frozendict({ 'gene_strand': 1, 'gene_name': 'Cblb', 'feature_type': 'SA', 'feature_strand': -1, 'feature_name': 'En2SA' }))
def test_from_fusion(self, fusion): """Tests from_fusion method.""" tr_fusion = TransposonFusion.from_fusion(fusion, 'T2onc') expected = TransposonFusion( seqname='1', anchor_genome=300, anchor_transposon=420, strand_genome=1, strand_transposon=1, flank_genome=-52, flank_transposon=62, support_junction=2, support_spanning=1, metadata=frozendict({})) assert tr_fusion == expected assert isinstance(tr_fusion.metadata, frozendict)
def annotate_fusions_for_transposon(fusions, feature_path): # type: (Iterable[Fusion], pathlib.Path) -> Iterable[Fusion] """Annotates fusions with transposon features overlapped by the fusion. Parameters ---------- fusions : iterable[TransposonFusion] Fusions to annotate. feature_path : str or pathlib.Path Path to TSV file containing transposon features. Yields ------ Fusion Fusions, annotated with transposon features. """ def _lookup_tree(tree, region): start, end = region overlap = tree[start:end] return [tup[2] for tup in overlap] # Build the feature tree. features = TransposonFeature.from_csv(feature_path, sep='\t') tree = IntervalTree.from_tuples((feat.start, feat.end, feat) for feat in features) for fusion in fusions: overlap = _lookup_tree(tree, fusion.transposon_region) if len(overlap) > 0: for feature in overlap: new_meta = { 'feature_name': feature.name, 'feature_type': feature.type, 'feature_strand': feature.strand } merged_meta = toolz.merge(fusion.metadata, new_meta) yield fusion._replace(metadata=frozendict(merged_meta)) else: yield fusion
def annotate_fusions_for_transposon(fusions, feature_path): # type: (Iterable[Fusion], pathlib.Path) -> Iterable[Fusion] """Annotates fusions with transposon features overlapped by the fusion. Parameters ---------- fusions : iterable[TransposonFusion] Fusions to annotate. feature_path : str or pathlib.Path Path to TSV file containing transposon features. Yields ------ Fusion Fusions, annotated with transposon features. """ def _lookup_tree(tree, region): start, end = region overlap = tree[start:end] return [tup[2] for tup in overlap] # Build the feature tree. features = TransposonFeature.from_csv(feature_path, sep='\t') tree = IntervalTree.from_tuples( (feat.start, feat.end, feat) for feat in features) for fusion in fusions: overlap = _lookup_tree(tree, fusion.transposon_region) if len(overlap) > 0: for feature in overlap: new_meta = { 'feature_name': feature.name, 'feature_type': feature.type, 'feature_strand': feature.strand } merged_meta = toolz.merge(fusion.metadata, new_meta) yield fusion._replace(metadata=frozendict(merged_meta)) else: yield fusion
def test_example_file(self, chimeric_junctions_path): """Test data read from example file.""" # Extract fusions. chimeric_data = star.read_chimeric_junctions(chimeric_junctions_path) fusions = set(star.extract_transposon_fusions(chimeric_data, 'T2onc')) # Check number of fusions. assert len(fusions) == 7 # Check key example. expected = TransposonFusion( seqname='16', anchor_genome=52141095, anchor_transposon=1541, strand_genome=-1, strand_transposon=1, flank_genome=-78, flank_transposon=-76, support_junction=380, support_spanning=118, metadata=frozendict({})) assert expected in fusions