def test_transcript_fasta_only():
    genome = Genome(reference_name="GRCm38",
                    annotation_name="_test_mouse_ensembl81_subset",
                    transcript_fasta_paths_or_urls=[
                        MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH
                    ])
    genome.index()

    eq_(2, len(genome.transcript_sequences.fasta_dictionary))

    with assert_raises(ValueError) as cm:
        genome.genes()
    no_gtf_(cm)

    with assert_raises(ValueError) as cm:
        genome.gene_ids()
    no_gtf_(cm)

    with assert_raises(ValueError) as cm:
        genome.gene_ids_of_gene_name("test")
    no_gtf_(cm)

    with assert_raises(ValueError) as cm:
        genome.transcript_names()
    no_gtf_(cm)

    with assert_raises(ValueError) as cm:
        genome.protein_sequence("test")
    no_protein_(cm)
def test_transcript_fasta_only():
    genome = Genome(
        reference_name="GRCm38",
        annotation_name="_test_mouse_ensembl81_subset",
        transcript_fasta_path_or_url=MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH)
    genome.index()

    eq_(2, len(genome.transcript_sequences.fasta_dictionary))

    with assert_raises(ValueError) as cm:
        genome.genes()
    no_gtf_(cm)

    with assert_raises(ValueError) as cm:
        genome.gene_ids()
    no_gtf_(cm)

    with assert_raises(ValueError) as cm:
        genome.gene_ids_of_gene_name("test")
    no_gtf_(cm)

    with assert_raises(ValueError) as cm:
        genome.transcript_names()
    no_gtf_(cm)

    with assert_raises(ValueError) as cm:
        genome.protein_sequence("test")
    no_protein_(cm)
Ejemplo n.º 3
0
def get_gene_ids_of_transcript_id(transcript_id: str,
                                  ensembl: pyensembl.Genome,
                                  raise_on_error: bool = False):
    """ Extract all gene ids associated with the given transcript.

    Parameters
    ----------
    transcript_id: string
        The transcript identifier

    ensembl: pyensembl.Genome
        The annotations

    raise_on_error: bool
        Whether to raise an exception if the transcript id is not found in the
        annotations database

    Returns
    -------
    transcript_gene_id_df: pd.DataFrame
        A dataframe with columns to map between transcripts and genes. Its
        columns are:

            transcript_id
            gene_id
    """
    try:
        gene_name = ensembl.gene_name_of_transcript_id(transcript_id)
        gene_ids = ensembl.gene_ids_of_gene_name(gene_name)
    except ValueError as ve:
        msg = ("['pyensembl_utils.get_gene_ids_of_transcript_id]: could not "
               "find transcript id in database: {}".format(transcript_id))
        if raise_on_error:
            raise ValueError(msg) from ve
        else:
            logger.warning(msg)
            return None

    ret = [{'transcript_id': transcript_id, 'gene_id': g} for g in gene_ids]

    return ret