def test_parse_panel_lines(): ## GIVEN a iterable with panel lines panel_lines = [ "##panel_id=panel1", "##institute=cust000", "##version=1.0", "##date=2016-12-09", "##display_name=Test panel", "#hgnc_id\thgnc_symbol\tdisease_associated_transcripts\treduced_penet" "rance\tgenetic_disease_models\tmosaicism\tdatabase_entry_version\tor" "iginal_hgnc", "7481\tMT-TF\t\t\t\t\t\tMT-TF\n", ] nr_genes = len([line for line in panel_lines if not line.startswith("#")]) ## WHEN parsing the panel of genes genes = parse_genes(panel_lines) ## THEN assert that all genes from the file have been parsed assert len(genes) == nr_genes ## THEN assert that some genes exists in the panel for gene in genes: assert gene["hgnc_symbol"] assert gene["hgnc_id"]
def new_panel(store, institute_id, panel_name, display_name, csv_lines): """Create a new gene panel.""" institute_obj = store.institute(institute_id) if institute_obj is None: flash("{}: institute not found".format(institute_id)) return None panel_obj = store.gene_panel(panel_name) if panel_obj: flash("panel already exists: {} - {}".format( panel_obj['panel_name'], panel_obj['display_name'])) return None log.debug("parse genes from CSV input") try: new_genes = parse_genes(csv_lines) except SyntaxError as error: flash(error.args[0], 'danger') return None log.debug("build new gene panel") panel_data = build_panel( dict( panel_name=panel_name, institute=institute_obj['_id'], version=1.0, date=dt.datetime.now(), display_name=display_name, genes=new_genes, ), store) panel_obj = store.add_gene_panel(panel_data) return panel_obj
def test_parse_panel_lines(): ## GIVEN a iterable with panel lines panel_lines = [ "##panel_id=panel1", "##institute=cust000", "##version=1.0", "##date=2016-12-09", "##display_name=Test panel", "#hgnc_id\thgnc_symbol\tdisease_associated_transcripts\treduced_penet"\ "rance\tgenetic_disease_models\tmosaicism\tdatabase_entry_version\tor"\ "iginal_hgnc", "7481\tMT-TF\t\t\t\t\t\tMT-TF\n" ] nr_genes = len([line for line in panel_lines if not line.startswith('#')]) ## WHEN parsing the panel of genes genes = parse_genes(panel_lines) ## THEN assert that all genes from the file have been parsed assert len(genes) == nr_genes ## THEN assert that some genes exists in the panel for gene in genes: assert gene['hgnc_symbol'] assert gene['hgnc_id']
def convert(panel): """Convert a gene panel with hgnc symbols to a new one with hgnc ids.""" adapter = store new_header = [ "hgnc_id", "hgnc_symbol", "disease_associated_transcripts", "reduced_penetrance", "genetic_disease_models", "mosaicism", "database_entry_version", ] genes = parse_genes(panel) adapter.add_hgnc_id(genes) click.echo("#{0}".format("\t".join(new_header))) for gene in genes: if gene.get("hgnc_id"): print_info = [] for head in new_header: print_info.append(str(gene[head]) if gene.get(head) else "") click.echo("\t".join(print_info))
def update_panel(store, panel_name, csv_lines, option): """Update an existing gene panel with genes. Args: store(scout.adapter.MongoAdapter) panel_name(str) csv_lines(iterable(str)): Stream with genes option(str): 'add' or 'replace' Returns: panel_obj(dict) """ new_genes= [] panel_obj = store.gene_panel(panel_name) if panel_obj is None: return None try: new_genes = parse_genes(csv_lines) # a list of gene dictionaries containing gene info except SyntaxError as error: flash(error.args[0], 'danger') return None # if existing genes are to be replaced by those in csv_lines if option == 'replace': # all existing genes should be deleted for gene in panel_obj['genes']: #create extra key to use in pending actions: gene['hgnc_symbol'] = gene['symbol'] store.add_pending(panel_obj, gene, action='delete', info=None) for new_gene in new_genes: if not new_gene['hgnc_id']: flash("gene missing hgnc id: {}".format(new_gene['hgnc_symbol']),'danger') continue gene_obj = store.hgnc_gene(new_gene['hgnc_id']) if gene_obj is None: flash("gene not found: {} - {}".format(new_gene['hgnc_id'], new_gene['hgnc_symbol']),'danger') continue if new_gene['hgnc_symbol'] and gene_obj['hgnc_symbol'] != new_gene['hgnc_symbol']: flash("symbol mis-match: {0} | {1}".format( gene_obj['hgnc_symbol'], new_gene['hgnc_symbol']), 'warning') info_data = { 'disease_associated_transcripts': new_gene['transcripts'], 'reduced_penetrance': new_gene['reduced_penetrance'], 'mosaicism': new_gene['mosaicism'], 'inheritance_models': new_gene['inheritance_models'], 'database_entry_version': new_gene['database_entry_version'], } if option == 'replace': # there will be no existing genes for sure, because we're replacing them all action = 'add' else: # add option. Add if genes is not existing. otherwise edit it existing_genes = {gene['hgnc_id'] for gene in panel_obj['genes']} action = 'edit' if gene_obj['hgnc_id'] in existing_genes else 'add' store.add_pending(panel_obj, gene_obj, action=action, info=info_data) return panel_obj
def test_parse_panel_lines_excel_export_empty_line(): ## GIVEN a iterable with panel lines panel_lines = ["hgnc_id;hgnc_symbol", "13666;AAAS", "16262;YAP1", ";", ""] nr_genes = 2 ## WHEN parsing the panel of genes genes = parse_genes(panel_lines) ## THEN assert that all genes from the file have been parsed assert len(genes) == nr_genes
def new_panel(store, institute_id, panel_name, display_name, csv_lines, description=None): """Create a new gene panel. Args: store(scout.adapter.MongoAdapter) institute_id(str) panel_name(str) display_name(str) csv_lines(iterable(str)): Stream with genes description(str) Returns: panel_id: the ID of the new panel document created or None """ institute_obj = store.institute(institute_id) if institute_obj is None: flash("{}: institute not found".format(institute_id)) return None panel_obj = store.gene_panel(panel_name) if panel_obj: flash("panel already exists: {} - {}".format(panel_obj['panel_name'], panel_obj['display_name'])) return None log.debug("parse genes from CSV input") try: new_genes = parse_genes(csv_lines) except SyntaxError as error: flash(error.args[0], 'danger') return None log.debug("build new gene panel") panel_id = None try: panel_data = build_panel(dict( panel_name=panel_name, institute=institute_obj['_id'], version=1.0, date=dt.datetime.now(), display_name=display_name, description=description, genes=new_genes, ), store) panel_id= store.add_gene_panel(panel_data) except Exception as err: log.error('An error occurred while adding the gene panel {}'.format(err)) return panel_id
def test_parse_panel_doublette(): ## GIVEN a iterable with panel lines where one gene occurs twice panel_lines = [ "#hgnc_id\thgnc_symbol\tdisease_associated_transcripts\treduced_penet" "rance\tgenetic_disease_models\tmosaicism\tdatabase_entry_version\tor" "iginal_hgnc", "7481\tMT-TF\t\t\t\t\t\tMT-TF\n" "7481\tMT-TF\t\t\t\t\t\tMT-TF\n", ] ## WHEN parsing the panel of genes genes = parse_genes(panel_lines) ## THEN that the gene is only occuring once assert len(genes) == 1
def new_panel(store, institute_id, panel_name, display_name, csv_lines): """Create a new gene panel. Args: store(scout.adapter.MongoAdapter) institute_id(str) panel_name(str) display_name(str) csv_lines(iterable(str)): Stream with genes Returns: panel_id: the ID of the new panel document created or None """ institute_obj = store.institute(institute_id) if institute_obj is None: flash("{}: institute not found".format(institute_id)) return None panel_obj = store.gene_panel(panel_name) if panel_obj: flash("panel already exists: {} - {}".format(panel_obj['panel_name'], panel_obj['display_name'])) return None log.debug("parse genes from CSV input") try: new_genes = parse_genes(csv_lines) except SyntaxError as error: flash(error.args[0], 'danger') return None log.debug("build new gene panel") panel_id = None try: panel_data = build_panel(dict( panel_name=panel_name, institute=institute_obj['_id'], version=1.0, date=dt.datetime.now(), display_name=display_name, genes=new_genes, ), store) panel_id= store.add_gene_panel(panel_data) except Exception as err: log.error('An error occurred while adding the gene panel {}'.format(err)) return panel_id
def test_parse_panel_doublette(): ## GIVEN a iterable with panel lines where one gene occurs twice panel_lines = [ "#hgnc_id\thgnc_symbol\tdisease_associated_transcripts\treduced_penet"\ "rance\tgenetic_disease_models\tmosaicism\tdatabase_entry_version\tor"\ "iginal_hgnc", "7481\tMT-TF\t\t\t\t\t\tMT-TF\n" "7481\tMT-TF\t\t\t\t\t\tMT-TF\n" ] ## WHEN parsing the panel of genes genes = parse_genes(panel_lines) ## THEN that the gene is only occuring once assert len(genes) == 1
def test_parse_minimal_panel_lines_symbol(): ## GIVEN a iterable with panel lines panel_lines = ["ADK"] nr_genes = len([line for line in panel_lines if not line.startswith("#")]) ## WHEN parsing the panel of genes genes = parse_genes(panel_lines) ## THEN assert that all genes from the file have been parsed assert len(genes) == nr_genes ## THEN assert that some genes exists in the panel for gene in genes: assert gene.get("hgnc_id") is None assert gene.get("hgnc_symbol") == "ADK"
def test_parse_panel_lines_modified_excel_export(): ## GIVEN a iterable with panel lines panel_lines = ["HGNC_IDnumber;HGNC_symbol", "13666;AAAS"] nr_genes = 1 ## WHEN parsing the panel of genes genes = parse_genes(panel_lines) ## THEN assert that all genes from the file have been parsed assert len(genes) == nr_genes ## THEN assert that some genes exists in the panel for gene in genes: assert gene.get("hgnc_id") == 13666 assert gene.get("hgnc_symbol") == "AAAS"
def test_parse_minimal_panel_lines_id(): ## GIVEN a iterable with panel lines panel_lines = ["1"] nr_genes = len([line for line in panel_lines if not line.startswith('#')]) ## WHEN parsing the panel of genes genes = parse_genes(panel_lines) ## THEN assert that all genes from the file have been parsed assert len(genes) == nr_genes ## THEN assert that some genes exists in the panel for gene in genes: assert gene['hgnc_id'] == 1 assert gene.get('hgnc_symbol') == None
def test_parse_panel_lines_excel_export_empty_line(): ## GIVEN a iterable with panel lines panel_lines = [ "hgnc_id;hgnc_symbol", "13666;AAAS", "16262;YAP1", ";", "", ] nr_genes = 2 ## WHEN parsing the panel of genes genes = parse_genes(panel_lines) ## THEN assert that all genes from the file have been parsed assert len(genes) == nr_genes
def test_parse_minimal_panel_lines_symbol(): ## GIVEN a iterable with panel lines panel_lines = [ "ADK" ] nr_genes = len([line for line in panel_lines if not line.startswith('#')]) ## WHEN parsing the panel of genes genes = parse_genes(panel_lines) ## THEN assert that all genes from the file have been parsed assert len(genes) == nr_genes ## THEN assert that some genes exists in the panel for gene in genes: assert gene.get('hgnc_id') == None assert gene.get('hgnc_symbol') == 'ADK'
def test_parse_panel_lines_modified_excel_export(): ## GIVEN a iterable with panel lines panel_lines = [ "HGNC_IDnumber;HGNC_symbol", "13666;AAAS" ] nr_genes = 1 ## WHEN parsing the panel of genes genes = parse_genes(panel_lines) ## THEN assert that all genes from the file have been parsed assert len(genes) == nr_genes ## THEN assert that some genes exists in the panel for gene in genes: assert gene.get('hgnc_id') == 13666 assert gene.get('hgnc_symbol') == 'AAAS'
def test_parse_panel_lines_excel_export(): ## GIVEN a iterable with panel lines panel_lines = ["hgnc_id;hgnc_symbol", "13666;AAAS", "16262;YAP1"] nr_genes = 2 ## WHEN parsing the panel of genes genes = parse_genes(panel_lines) ## THEN assert that all genes from the file have been parsed assert len(genes) == nr_genes ## THEN assert that some genes exists in the panel gene1 = genes[0] assert gene1.get("hgnc_id") == 13666 assert gene1.get("hgnc_symbol") == "AAAS" gene2 = genes[1] assert gene2.get("hgnc_id") == 16262 assert gene2.get("hgnc_symbol") == "YAP1"
def test_parse_panel_genes(panel_1_file): # GIVEN a gene panel file nr_genes = 0 with open(panel_1_file, "r") as f: for line in f: if not line.startswith("#"): nr_genes += 1 # WHEN parsing the panel of genes f = get_file_handle(panel_1_file) genes = parse_genes(f) # THEN assert that all genes from the file have been parsed assert len(genes) == nr_genes # THEN assert that some genes exists in the panek for gene in genes: assert gene["hgnc_symbol"] assert gene["hgnc_id"]
def convert(context, panel): """Convert a gene panel with hgnc symbols to a new one with hgnc ids.""" adapter = context.obj['adapter'] new_header = ["hgnc_id","hgnc_symbol","disease_associated_transcripts", "reduced_penetrance", "genetic_disease_models", "mosaicism", "database_entry_version"] genes = parse_genes(panel) adapter.add_hgnc_id(genes) click.echo("#{0}".format('\t'.join(new_header))) for gene in genes: if gene.get('hgnc_id'): print_info = [] for head in new_header: print_info.append(str(gene[head]) if gene.get(head) else '') click.echo('\t'.join(print_info))
def test_parse_panel_genes(panel_1_file): # GIVEN a gene panel file nr_genes = 0 with open(panel_1_file, 'r') as f: for line in f: if not line.startswith('#'): nr_genes += 1 # WHEN parsing the panel of genes f = get_file_handle(panel_1_file) genes = parse_genes(f) # THEN assert that all genes from the file have been parsed assert len(genes) == nr_genes # THEN assert that some genes exists in the panek for gene in genes: assert gene['hgnc_symbol'] assert gene['hgnc_id']
def update_panel(store, panel_name, csv_lines): """Update an existing gene panel with genes.""" panel_obj = store.gene_panel(panel_name) if panel_obj is None: return None existing_genes = {gene['hgnc_id'] for gene in panel_obj['genes']} try: new_genes = parse_genes(csv_lines) except SyntaxError as error: flash(error.args[0], 'danger') return None for new_gene in new_genes: if not new_gene['hgnc_id']: flash("gene missing hgnc id: {}".format(new_gene['hgnc_symbol']), 'danger') continue gene_obj = store.hgnc_gene(new_gene['hgnc_id']) if gene_obj is None: flash( "gene not found: {} - {}".format(new_gene['hgnc_id'], new_gene['hgnc_symbol']), 'danger') continue if new_gene['hgnc_symbol'] and gene_obj['hgnc_symbol'] != new_gene[ 'hgnc_symbol']: flash( "symbol mis-match: {} | {}".format(gene_obj['hgnc_symbol'], new_gene['hgnc_symbol']), 'warning') action = 'edit' if gene_obj['hgnc_id'] in existing_genes else 'add' info_data = { 'disease_associated_transcripts': new_gene['transcripts'], 'reduced_penetrance': new_gene['reduced_penetrance'], 'mosaicism': new_gene['mosaicism'], 'inheritance_models': new_gene['inheritance_models'], 'database_entry_version': new_gene['database_entry_version'], } store.add_pending(panel_obj, gene_obj, action=action, info=info_data) return panel_obj
def test_parse_panel_lines_excel_export(): ## GIVEN a iterable with panel lines panel_lines = [ "hgnc_id;hgnc_symbol", "13666;AAAS", "16262;YAP1" ] nr_genes = 2 ## WHEN parsing the panel of genes genes = parse_genes(panel_lines) ## THEN assert that all genes from the file have been parsed assert len(genes) == nr_genes ## THEN assert that some genes exists in the panel gene1 = genes[0] assert gene1.get('hgnc_id') == 13666 assert gene1.get('hgnc_symbol') == 'AAAS' gene2 = genes[1] assert gene2.get('hgnc_id') == 16262 assert gene2.get('hgnc_symbol') == 'YAP1'
def new_panel( store, institute_id, panel_name, display_name, csv_lines, maintainer=None, description=None, ): """Create a new gene panel. Args: store(scout.adapter.MongoAdapter) institute_id(str) panel_name(str) display_name(str) csv_lines(iterable(str)): Stream with genes maintainer(list(user._id)) description(str) Returns: panel_id: the ID of the new panel document created or None """ institute_obj = store.institute(institute_id) if institute_obj is None: flash("{}: institute not found".format(institute_id)) return None panel_obj = store.gene_panel(panel_name) if panel_obj: flash( "panel already exists: {} - {}".format( panel_obj["panel_name"], panel_obj["display_name"] ), "danger", ) return None LOG.debug("parse genes from CSV input") try: new_genes = parse_genes(csv_lines) except SyntaxError as error: flash(error.args[0], "danger") LOG.debug("Ooops!") return None LOG.debug("build new gene panel") panel_id = None try: panel_data = build_panel( dict( panel_name=panel_name, institute=institute_obj["_id"], version=1.0, maintainer=maintainer, date=dt.datetime.now(), display_name=display_name, description=description, genes=new_genes, hidden=False, ), store, ) panel_id = store.add_gene_panel(panel_data) except Exception as err: flash(str(err), "danger") return panel_id
def update_panel(store, panel_name, csv_lines, option): """Update an existing gene panel with genes. Args: store(scout.adapter.MongoAdapter) panel_name(str) csv_lines(iterable(str)): Stream with genes option(str): 'add' or 'replace' Returns: panel_obj(dict) """ new_genes = [] panel_obj = store.gene_panel(panel_name) if panel_obj is None: return None try: new_genes = parse_genes( csv_lines) # a list of gene dictionaries containing gene info except SyntaxError as error: flash(error.args[0], "danger") return None # if existing genes are to be replaced by those in csv_lines if option == "replace": # all existing genes should be deleted for gene in panel_obj["genes"]: # create extra key to use in pending actions: gene["hgnc_symbol"] = gene["symbol"] store.add_pending(panel_obj, gene, action="delete", info=None) for new_gene in new_genes: if not new_gene["hgnc_id"]: flash("gene missing hgnc id: {}".format(new_gene["hgnc_symbol"]), "danger") continue gene_obj = store.hgnc_gene(new_gene["hgnc_id"]) if gene_obj is None: flash( "gene not found: {} - {}".format(new_gene["hgnc_id"], new_gene["hgnc_symbol"]), "danger", ) continue if (new_gene["hgnc_symbol"] and gene_obj["hgnc_symbol"] != new_gene["hgnc_symbol"]): flash( "symbol mis-match: {0} | {1}".format(gene_obj["hgnc_symbol"], new_gene["hgnc_symbol"]), "warning", ) info_data = { "disease_associated_transcripts": new_gene["transcripts"], "reduced_penetrance": new_gene["reduced_penetrance"], "mosaicism": new_gene["mosaicism"], "inheritance_models": new_gene["inheritance_models"], "database_entry_version": new_gene["database_entry_version"], } if ( option == "replace" ): # there will be no existing genes for sure, because we're replacing them all action = "add" else: # add option. Add if genes is not existing. otherwise edit it existing_genes = {gene["hgnc_id"] for gene in panel_obj["genes"]} action = "edit" if gene_obj["hgnc_id"] in existing_genes else "add" store.add_pending(panel_obj, gene_obj, action=action, info=info_data) return panel_obj