def gene_obj(): """Get a dictionary with with gene obj information""" gene = HgncGene( hgnc_symbol="B3GALT6", hgnc_id=17978, ensembl_id="ENSG00000176022", chrom="1", start=1232237, end=1235041, build="38", ) return gene
def test_insert_gene(adapter, parsed_gene): ##GIVEN a empty adapter assert adapter.all_genes().count() == 0 ##WHEN inserting a gene gene_obj = HgncGene(**parsed_gene) obj_id = adapter.load_hgnc_gene(gene_obj) ##THEN assert that the gene is there assert adapter.all_genes().count() == 1 ##THEN assert that no genes are in the '38' build assert adapter.all_genes(build='38').count() == 0
def test_insert_many_genes(adapter, parsed_gene): adapter = adapter gene_objs = [] ##GIVEN a empty adapter assert adapter.all_genes().count() == 0 ##WHEN inserting a bulk of genes for i in range(300): parsed_gene['hgnc_id'] = i gene_objs.append(HgncGene(**parsed_gene)) result = adapter.load_hgnc_bulk(gene_objs) ##THEN assert that the genes are loaded assert adapter.all_genes().count() == 300 ##THEN assert that no genes are in the '38' build assert adapter.all_genes(build='38').count() == 0
def build_hgnc_gene(gene_info, build="37"): """Build a hgnc_gene object Args: gene_info(dict): Gene information Returns: gene_obj(dict) { '_id': ObjectId(), # This is the hgnc id, required: 'hgnc_id': int, # The primary symbol, required 'hgnc_symbol': str, 'ensembl_id': str, # required 'build': str, # '37' or '38', defaults to '37', required 'chromosome': str, # required 'start': int, # required 'end': int, # required 'description': str, # Gene description 'aliases': list(), # Gene symbol aliases, includes hgnc_symbol, str 'entrez_id': int, 'omim_id': int, 'pli_score': float, 'primary_transcripts': list(), # List of refseq transcripts (str) 'ucsc_id': str, 'uniprot_ids': list(), # List of str 'vega_id': str, 'transcripts': list(), # List of hgnc_transcript # Inheritance information 'inheritance_models': list(), # List of model names 'incomplete_penetrance': bool, # Acquired from HPO # Phenotype information 'phenotypes': list(), # List of dictionaries with phenotype information } """ try: hgnc_id = int(gene_info["hgnc_id"]) except KeyError as err: raise KeyError("Gene has to have a hgnc_id") except ValueError as err: raise ValueError("hgnc_id has to be integer") try: hgnc_symbol = gene_info["hgnc_symbol"] except KeyError as err: raise KeyError("Gene has to have a hgnc_symbol") try: ensembl_id = gene_info["ensembl_gene_id"] except KeyError as err: raise KeyError("Gene has to have a ensembl_id") try: chromosome = gene_info["chromosome"] except KeyError as err: raise KeyError("Gene has to have a chromosome") try: start = int(gene_info["start"]) except KeyError as err: raise KeyError("Gene has to have a start position") except TypeError as err: raise TypeError("Gene start has to be a integer") try: end = int(gene_info["end"]) except KeyError as err: raise KeyError("Gene has to have a end position") except TypeError as err: raise TypeError("Gene end has to be a integer") gene_obj = HgncGene( hgnc_id=hgnc_id, hgnc_symbol=hgnc_symbol, ensembl_id=ensembl_id, chrom=chromosome, start=start, end=end, build=build, ) if gene_info.get("description"): gene_obj["description"] = gene_info["description"] # LOG.debug("Adding info %s", gene_info['description']) if gene_info.get("previous_symbols"): gene_obj["aliases"] = gene_info["previous_symbols"] if gene_info.get("entrez_id"): gene_obj["entrez_id"] = int(gene_info["entrez_id"]) if gene_info.get("omim_id"): gene_obj["omim_id"] = int(gene_info["omim_id"]) if gene_info.get("pli_score"): gene_obj["pli_score"] = float(gene_info["pli_score"]) if gene_info.get("ref_seq"): gene_obj["primary_transcripts"] = gene_info["ref_seq"] if gene_info.get("ucsc_id"): gene_obj["ucsc_id"] = gene_info["ucsc_id"] if gene_info.get("uniprot_ids"): gene_obj["uniprot_ids"] = gene_info["uniprot_ids"] if gene_info.get("vega_id"): gene_obj["vega_id"] = gene_info["vega_id"] if gene_info.get("incomplete_penetrance"): gene_obj["incomplete_penetrance"] = True if gene_info.get("inheritance_models"): gene_obj["inheritance_models"] = gene_info["inheritance_models"] phenotype_objs = [] for phenotype_info in gene_info.get("phenotypes", []): phenotype_objs.append(build_phenotype(phenotype_info)) if phenotype_objs: gene_obj["phenotypes"] = phenotype_objs for key in list(gene_obj): if gene_obj[key] is None: gene_obj.pop(key) return gene_obj
def build_hgnc_gene(gene_info, build='37'): """Build a hgnc_gene object Args: gene_info(dict): Gene information Returns: gene_obj(dict) { '_id': ObjectId(), # This is the hgnc id, required: 'hgnc_id': int, # The primary symbol, required 'hgnc_symbol': str, 'ensembl_id': str, # required 'build': str, # '37' or '38', defaults to '37', required 'chromosome': str, # required 'start': int, # required 'end': int, # required 'description': str, # Gene description 'aliases': list(), # Gene symbol aliases, includes hgnc_symbol, str 'entrez_id': int, 'omim_id': int, 'pli_score': float, 'primary_transcripts': list(), # List of refseq transcripts (str) 'ucsc_id': str, 'uniprot_ids': list(), # List of str 'vega_id': str, 'transcripts': list(), # List of hgnc_transcript # Inheritance information 'inheritance_models': list(), # List of model names 'incomplete_penetrance': bool, # Acquired from HPO # Phenotype information 'phenotypes': list(), # List of dictionaries with phenotype information } """ try: hgnc_id = int(gene_info['hgnc_id']) except KeyError as err: raise KeyError("Gene has to have a hgnc_id") except ValueError as err: raise ValueError("hgnc_id has to be integer") try: hgnc_symbol = gene_info['hgnc_symbol'] except KeyError as err: raise KeyError("Gene has to have a hgnc_symbol") try: ensembl_id = gene_info['ensembl_gene_id'] except KeyError as err: raise KeyError("Gene has to have a ensembl_id") try: chromosome = gene_info['chromosome'] except KeyError as err: raise KeyError("Gene has to have a chromosome") try: start = int(gene_info['start']) except KeyError as err: raise KeyError("Gene has to have a start position") except TypeError as err: raise TypeError("Gene start has to be a integer") try: end = int(gene_info['end']) except KeyError as err: raise KeyError("Gene has to have a end position") except TypeError as err: raise TypeError("Gene end has to be a integer") gene_obj = HgncGene( hgnc_id=hgnc_id, hgnc_symbol=hgnc_symbol, ensembl_id=ensembl_id, chrom=chromosome, start=start, end=end, build=build, ) if gene_info.get('description'): gene_obj['description'] = gene_info['description'] # LOG.debug("Adding info %s", gene_info['description']) if gene_info.get('previous_symbols'): gene_obj['aliases'] = gene_info['previous_symbols'] if gene_info.get('entrez_id'): gene_obj['entrez_id'] = int(gene_info['entrez_id']) if gene_info.get('omim_id'): gene_obj['omim_id'] = int(gene_info['omim_id']) if gene_info.get('pli_score'): gene_obj['pli_score'] = float(gene_info['pli_score']) if gene_info.get('ref_seq'): gene_obj['primary_transcripts'] = gene_info['ref_seq'] if gene_info.get('ucsc_id'): gene_obj['ucsc_id'] = gene_info['ucsc_id'] if gene_info.get('uniprot_ids'): gene_obj['uniprot_ids'] = gene_info['uniprot_ids'] if gene_info.get('vega_id'): gene_obj['vega_id'] = gene_info['vega_id'] if gene_info.get('incomplete_penetrance'): gene_obj['incomplete_penetrance'] = True if gene_info.get('inheritance_models'): gene_obj['inheritance_models'] = gene_info['inheritance_models'] phenotype_objs = [] for phenotype_info in gene_info.get('phenotypes', []): phenotype_objs.append(build_phenotype(phenotype_info)) if phenotype_objs: gene_obj['phenotypes'] = phenotype_objs for key in list(gene_obj): if gene_obj[key] is None: gene_obj.pop(key) return gene_obj