Beispiel #1
0
def parse_transcript(gene_obj, tx_obj, build=None):
    """Parse variant gene transcript (VEP)."""
    build = build or 37
    add_tx_links(tx_obj, build)

    if tx_obj.get('refseq_id'):
        gene_name = (gene_obj['common']['hgnc_symbol'] if gene_obj['common'] else
                     gene_obj['hgnc_id'])
        tx_obj['change_str'] = transcript_str(tx_obj, gene_name)
Beispiel #2
0
def gene(store, hgnc_id):
    """Parse information about a gene."""
    res = {
        'builds': {
            '37': None,
            '38': None
        },
        'symbol': None,
        'description': None,
        'ensembl_id': None,
        'record': None
    }

    for build in res['builds']:
        record = store.hgnc_gene(hgnc_id, build=build)
        if record:

            record[
                'position'] = "{this[chromosome]}:{this[start]}-{this[end]}".format(
                    this=record)
            res['aliases'] = record['aliases']
            res['hgnc_id'] = record['hgnc_id']
            res['description'] = record['description']
            res['builds'][build] = record
            res['symbol'] = record['hgnc_symbol']
            res['description'] = record['description']
            res['entrez_id'] = record.get('entrez_id')
            res['pli_score'] = record.get('pli_score')

            add_gene_links(record, int(build))

            res['omim_id'] = record.get('omim_id')
            res['incomplete_penetrance'] = record.get('incomplete_penetrance',
                                                      False)
            res['inheritance_models'] = record.get('inheritance_models', [])
            for transcript in record['transcripts']:
                transcript['position'] = (
                    "{this[chrom]}:{this[start]}-{this[end]}".format(
                        this=transcript))
                add_tx_links(transcript, build)

            for phenotype in record.get('phenotypes', []):
                phenotype['omim_link'] = omim(phenotype.get('mim_number'))

            if not res['record']:
                res['record'] = record
            pp(record)

    # If none of the genes where found
    if not any(res.values()):
        raise ValueError

    return res
Beispiel #3
0
def gene(store, hgnc_id):
    """Parse information about a gene."""
    res = {
        "builds": {
            "37": None,
            "38": None
        },
        "symbol": None,
        "description": None,
        "ensembl_id": None,
        "record": None,
    }

    for build in res["builds"]:
        record = store.hgnc_gene(hgnc_id, build=build)
        if record:

            record[
                "position"] = "{this[chromosome]}:{this[start]}-{this[end]}".format(
                    this=record)
            res["aliases"] = record["aliases"]
            res["hgnc_id"] = record["hgnc_id"]
            res["description"] = record["description"]
            res["builds"][build] = record
            res["symbol"] = record["hgnc_symbol"]
            res["description"] = record["description"]
            res["entrez_id"] = record.get("entrez_id")
            res["pli_score"] = record.get("pli_score")

            add_gene_links(record, int(build))

            res["omim_id"] = record.get("omim_id")
            res["incomplete_penetrance"] = record.get("incomplete_penetrance",
                                                      False)
            res["inheritance_models"] = record.get("inheritance_models", [])
            for transcript in record["transcripts"]:
                transcript[
                    "position"] = "{this[chrom]}:{this[start]}-{this[end]}".format(
                        this=transcript)
                add_tx_links(transcript, build)

            for phenotype in record.get("phenotypes", []):
                phenotype["omim_link"] = omim(phenotype.get("mim_number"))

            if not res["record"]:
                res["record"] = record

    # If none of the genes where found
    if not any(res.values()):
        raise ValueError

    return res
Beispiel #4
0
def gene(store, hgnc_id):
    """Parse information about a gene."""
    res = {'builds': {'37': None, '38': None}, 'symbol': None, 'description': None, 'ensembl_id': None, 'record': None}

    for build in res['builds']:
        record = store.hgnc_gene(hgnc_id, build=build)
        if record:

            record['position'] = "{this[chromosome]}:{this[start]}-{this[end]}".format(this=record)
            res['aliases'] = record['aliases']
            res['hgnc_id'] = record['hgnc_id']
            res['description'] = record['description']
            res['builds'][build] = record
            res['symbol'] = record['hgnc_symbol']
            res['description'] = record['description']
            res['entrez_id'] = record.get('entrez_id')
            res['pli_score'] = record.get('pli_score')

            add_gene_links(record, int(build))

            res['omim_id'] = record.get('omim_id')
            res['incomplete_penetrance'] = record.get('incomplete_penetrance',False)
            res['inheritance_models'] = record.get('inheritance_models',[])
            for transcript in record['transcripts']:
                transcript['position'] = ("{this[chrom]}:{this[start]}-{this[end]}"
                                          .format(this=transcript))
                add_tx_links(transcript, build)

            for phenotype in record.get('phenotypes',[]):
                phenotype['omim_link'] = omim(phenotype.get('mim_number'))

            if not res['record']:
                res['record'] = record

    # If none of the genes where found
    if not any(res.values()):
        raise ValueError

    return res
Beispiel #5
0
def update_transcripts_information(variant_gene,
                                   hgnc_gene,
                                   variant_obj,
                                   genome_build=None):
    """Collect tx info from the hgnc gene and panels and update variant transcripts

    Since the hgnc information are continuously being updated we need to run this each time a
    variant is fetched.

    This function will:
        - Add a dictionary with tx_id -> tx_info to the hgnc variant
        - Add information from the panel
        - Adds a list of refseq transcripts

    Args:
        variant_gene(dict): the gene information from the variant
        hgnc_gene(dict): the hgnc gene information
        varaiant_obj(scout.models.Variant)

    """
    genome_build = genome_build or "37"
    disease_associated_no_version = variant_gene.get(
        "disease_associated_no_version", set())
    # Create a dictionary with transcripts information
    # Use ensembl transcript id as keys
    transcripts_dict = {}
    # Add transcript information from the hgnc gene
    for transcript in hgnc_gene.get("transcripts", []):
        tx_id = transcript["ensembl_transcript_id"]
        transcripts_dict[tx_id] = transcript

    # Add the transcripts to the gene object
    hgnc_gene["transcripts_dict"] = transcripts_dict
    hgnc_symbol = hgnc_gene["hgnc_symbol"]
    refseq_transcripts = []

    # First loop over the variants transcripts
    for transcript in variant_gene.get("transcripts", []):
        tx_id = transcript["transcript_id"]
        hgnc_transcript = transcripts_dict.get(tx_id)
        # If the tx does not exist in ensembl anymore we skip it
        if not hgnc_transcript:
            continue

        # Check in the common information if it is a primary transcript
        if hgnc_transcript.get("is_primary"):
            transcript["is_primary"] = True

        # Add the transcript links
        add_tx_links(transcript, genome_build)
        # If the transcript has a ref seq identifier we add that
        # to the variants transcript
        refseq_id = hgnc_transcript.get("refseq_id")
        if not refseq_id:
            continue
        transcript["refseq_id"] = refseq_id
        variant_obj["has_refseq"] = True

        refseq_transcripts.append(transcript)
        # Check if the refseq id are disease associated
        if refseq_id in disease_associated_no_version:
            transcript["is_disease_associated"] = True

        # Since a ensemble transcript can have multiple refseq identifiers we add all of
        # those
        transcript["refseq_identifiers"] = hgnc_transcript.get(
            "refseq_identifiers", [])

        transcript["change_str"] = transcript_str(transcript, hgnc_symbol)

    variant_gene["primary_transcripts"] = refseq_transcripts