Python EnsemblRelease.transcript_ids Examples

Programming Language: Python

Namespace/Package Name: pyensembl

Class/Type: EnsemblRelease

Method/Function: transcript_ids

Examples at hotexamples.com: 4

Python EnsemblRelease.transcript_ids - 4 examples found. These are the top rated real world Python examples of pyensembl.EnsemblRelease.transcript_ids extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

EnsemblRelease(30)

download(8)

index(8)

gene_names_at_locus(7)

transcript_by_id(6)

gene_by_id(6)

genes_by_name(3)

gene_ids_of_gene_name(3)

transcript_ids_of_gene_id(2)

transcript_ids(2)

genes_at_locus(2)

genes(2)

transcripts_by_name(2)

exon_ids_of_transcript_id(2)

exon_by_id(2)

cache_directory_path(1)

gene_names(1)

gene_name_of_gene_id(1)

gene_ids(1)

gene_id_of_protein_id(1)

exons_at_locus(1)

protein_sequence(1)

exon_ids_of_gene_name(1)

cached(1)

transcript_ids_of_gene_name(1)

gene_name_of_transcript_id(1)

Example #1

Show file

File: util.py Project: barryhicks/varcode

def random_variants(count,
                    ensembl_release=MAX_ENSEMBL_RELEASE,
                    deletions=True,
                    insertions=True,
                    random_seed=None):
    """
    Generate a VariantCollection with random variants that overlap
    at least one complete coding transcript.
    """
    rng = random.Random(random_seed)
    ensembl = EnsemblRelease(ensembl_release)

    if ensembl_release in _transcript_ids_cache:
        transcript_ids = _transcript_ids_cache[ensembl_release]
    else:
        transcript_ids = ensembl.transcript_ids()
        _transcript_ids_cache[ensembl_release] = transcript_ids

    variants = []

    while len(variants) < count:
        transcript_id = rng.choice(transcript_ids)
        transcript = ensembl.transcript_by_id(transcript_id)

        if not transcript.complete:
            continue

        exon = rng.choice(transcript.exons)
        base1_genomic_position = rng.randint(exon.start, exon.end)
        transcript_offset = transcript.spliced_offset(base1_genomic_position)

        try:
            seq = transcript.sequence
        except ValueError as e:
            logging.warn(e)
            # can't get sequence for non-coding transcripts
            continue

        ref = str(seq[transcript_offset])
        if transcript.on_backward_strand:
            ref = reverse_complement(ref)

        alt_nucleotides = [x for x in STANDARD_NUCLEOTIDES if x != ref]

        if insertions:
            nucleotide_pairs = [
                x + y for x in STANDARD_NUCLEOTIDES
                for y in STANDARD_NUCLEOTIDES
            ]
            alt_nucleotides.extend(nucleotide_pairs)
        if deletions:
            alt_nucleotides.append("")
        alt = rng.choice(alt_nucleotides)
        variant = Variant(transcript.contig,
                          base1_genomic_position,
                          ref=ref,
                          alt=alt,
                          ensembl=ensembl)
        variants.append(variant)
    return VariantCollection(variants)

Example #2

Show file

File: util.py Project: vreuter/varcode

def random_variants(count,
                    ensembl_release=MAX_ENSEMBL_RELEASE,
                    deletions=True,
                    insertions=True,
                    random_seed=None):
    """
    Generate a VariantCollection with random variants that overlap
    at least one complete coding transcript.
    """
    rng = random.Random(random_seed)
    ensembl = EnsemblRelease(ensembl_release)

    if ensembl_release in _transcript_ids_cache:
        transcript_ids = _transcript_ids_cache[ensembl_release]
    else:
        transcript_ids = ensembl.transcript_ids()
        _transcript_ids_cache[ensembl_release] = transcript_ids

    variants = []

    # we should finish way before this loop is over but just in case
    # something is wrong with PyEnsembl we want to avoid an infinite loop
    for _ in range(count * 100):
        if len(variants) < count:
            transcript_id = rng.choice(transcript_ids)
            transcript = ensembl.transcript_by_id(transcript_id)

            if not transcript.complete:
                continue

            exon = rng.choice(transcript.exons)
            base1_genomic_position = rng.randint(exon.start, exon.end)
            transcript_offset = transcript.spliced_offset(
                base1_genomic_position)
            seq = transcript.sequence

            ref = str(seq[transcript_offset])
            if transcript.on_backward_strand:
                ref = reverse_complement(ref)

            alt_nucleotides = [x for x in STANDARD_NUCLEOTIDES if x != ref]

            if insertions:
                nucleotide_pairs = [
                    x + y for x in STANDARD_NUCLEOTIDES
                    for y in STANDARD_NUCLEOTIDES
                ]
                alt_nucleotides.extend(nucleotide_pairs)
            if deletions:
                alt_nucleotides.append("")
            alt = rng.choice(alt_nucleotides)
            variant = Variant(transcript.contig,
                              base1_genomic_position,
                              ref=ref,
                              alt=alt,
                              ensembl=ensembl)
            variants.append(variant)
        else:
            return VariantCollection(variants)
    raise ValueError(("Unable to generate %d random variants, "
                      "there may be a problem with PyEnsembl") % count)

Example #3

Show file

File: util.py Project: barryhicks/varcode

def random_variants(
        count,
        ensembl_release=MAX_ENSEMBL_RELEASE,
        deletions=True,
        insertions=True,
        random_seed=None):
    """
    Generate a VariantCollection with random variants that overlap
    at least one complete coding transcript.
    """
    rng = random.Random(random_seed)
    ensembl = EnsemblRelease(ensembl_release)

    if ensembl_release in _transcript_ids_cache:
        transcript_ids = _transcript_ids_cache[ensembl_release]
    else:
        transcript_ids = ensembl.transcript_ids()
        _transcript_ids_cache[ensembl_release] = transcript_ids

    variants = []

    while len(variants) < count:
        transcript_id = rng.choice(transcript_ids)
        transcript = ensembl.transcript_by_id(transcript_id)

        if not transcript.complete:
            continue

        exon = rng.choice(transcript.exons)
        base1_genomic_position = rng.randint(exon.start, exon.end)
        transcript_offset = transcript.spliced_offset(base1_genomic_position)

        try:
            seq = transcript.sequence
        except ValueError as e:
            logging.warn(e)
            # can't get sequence for non-coding transcripts
            continue

        ref = str(seq[transcript_offset])
        if transcript.on_backward_strand:
            ref = reverse_complement(ref)

        alt_nucleotides = [x for x in STANDARD_NUCLEOTIDES if x != ref]

        if insertions:
            nucleotide_pairs = [
                x + y
                for x in STANDARD_NUCLEOTIDES
                for y in STANDARD_NUCLEOTIDES
            ]
            alt_nucleotides.extend(nucleotide_pairs)
        if deletions:
            alt_nucleotides.append("")
        alt = rng.choice(alt_nucleotides)
        variant = Variant(
            transcript.contig,
            base1_genomic_position,
            ref=ref,
            alt=alt,
            ensembl=ensembl)
        variants.append(variant)
    return VariantCollection(variants)

Example #4

Show file

File: util.py Project: Al3n70rn/varcode

def random_variants(
        count,
        ensembl_release=MAX_ENSEMBL_RELEASE,
        deletions=True,
        insertions=True,
        random_seed=None):
    """
    Generate a VariantCollection with random variants that overlap
    at least one complete coding transcript.
    """
    rng = random.Random(random_seed)
    ensembl = EnsemblRelease(ensembl_release)

    if ensembl_release in _transcript_ids_cache:
        transcript_ids = _transcript_ids_cache[ensembl_release]
    else:
        transcript_ids = ensembl.transcript_ids()
        _transcript_ids_cache[ensembl_release] = transcript_ids

    variants = []

    # we should finish way before this loop is over but just in case
    # something is wrong with PyEnsembl we want to avoid an infinite loop
    for _ in range(count * 100):
        if len(variants) < count:
            transcript_id = rng.choice(transcript_ids)
            transcript = ensembl.transcript_by_id(transcript_id)

            if not transcript.complete:
                continue

            exon = rng.choice(transcript.exons)
            base1_genomic_position = rng.randint(exon.start, exon.end)
            transcript_offset = transcript.spliced_offset(base1_genomic_position)
            seq = transcript.sequence

            ref = str(seq[transcript_offset])
            if transcript.on_backward_strand:
                ref = reverse_complement(ref)

            alt_nucleotides = [x for x in STANDARD_NUCLEOTIDES if x != ref]

            if insertions:
                nucleotide_pairs = [
                    x + y
                    for x in STANDARD_NUCLEOTIDES
                    for y in STANDARD_NUCLEOTIDES
                ]
                alt_nucleotides.extend(nucleotide_pairs)
            if deletions:
                alt_nucleotides.append("")
            alt = rng.choice(alt_nucleotides)
            variant = Variant(
                transcript.contig,
                base1_genomic_position,
                ref=ref,
                alt=alt,
                ensembl=ensembl)
            variants.append(variant)
        else:
            return VariantCollection(variants)
    raise ValueError(
        ("Unable to generate %d random variants, "
         "there may be a problem with PyEnsembl") % count)