def test_transcript_fasta_only(): genome = Genome(reference_name="GRCm38", annotation_name="_test_mouse_ensembl81_subset", transcript_fasta_paths_or_urls=[ MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH ]) genome.index() eq_(2, len(genome.transcript_sequences.fasta_dictionary)) with assert_raises(ValueError) as cm: genome.genes() no_gtf_(cm) with assert_raises(ValueError) as cm: genome.gene_ids() no_gtf_(cm) with assert_raises(ValueError) as cm: genome.gene_ids_of_gene_name("test") no_gtf_(cm) with assert_raises(ValueError) as cm: genome.transcript_names() no_gtf_(cm) with assert_raises(ValueError) as cm: genome.protein_sequence("test") no_protein_(cm)
def test_transcript_fasta_only(): genome = Genome( reference_name="GRCm38", annotation_name="_test_mouse_ensembl81_subset", transcript_fasta_path_or_url=MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH) genome.index() eq_(2, len(genome.transcript_sequences.fasta_dictionary)) with assert_raises(ValueError) as cm: genome.genes() no_gtf_(cm) with assert_raises(ValueError) as cm: genome.gene_ids() no_gtf_(cm) with assert_raises(ValueError) as cm: genome.gene_ids_of_gene_name("test") no_gtf_(cm) with assert_raises(ValueError) as cm: genome.transcript_names() no_gtf_(cm) with assert_raises(ValueError) as cm: genome.protein_sequence("test") no_protein_(cm)
def get_gene_ids_of_transcript_id(transcript_id: str, ensembl: pyensembl.Genome, raise_on_error: bool = False): """ Extract all gene ids associated with the given transcript. Parameters ---------- transcript_id: string The transcript identifier ensembl: pyensembl.Genome The annotations raise_on_error: bool Whether to raise an exception if the transcript id is not found in the annotations database Returns ------- transcript_gene_id_df: pd.DataFrame A dataframe with columns to map between transcripts and genes. Its columns are: transcript_id gene_id """ try: gene_name = ensembl.gene_name_of_transcript_id(transcript_id) gene_ids = ensembl.gene_ids_of_gene_name(gene_name) except ValueError as ve: msg = ("['pyensembl_utils.get_gene_ids_of_transcript_id]: could not " "find transcript id in database: {}".format(transcript_id)) if raise_on_error: raise ValueError(msg) from ve else: logger.warning(msg) return None ret = [{'transcript_id': transcript_id, 'gene_id': g} for g in gene_ids] return ret