def add_phenotype(self, ind_obj, phenotype_id): """Add a phenotype term to the case.""" if phenotype_id.startswith('HP:') or len(phenotype_id) == 7: logger.debug('querying on HPO term') hpo_results = phizz.query_hpo([phenotype_id]) else: logger.debug('querying on OMIM term') hpo_results = phizz.query_disease([phenotype_id]) added_terms = [] if hpo_results else None existing_ids = set(term.phenotype_id for term in ind_obj.phenotypes) for result in hpo_results: if result['hpo_term'] not in existing_ids: term = PhenotypeTerm(phenotype_id=result['hpo_term'], description=result['description']) logger.info('adding new HPO term: %s', term.phenotype_id) ind_obj.phenotypes.append(term) added_terms.append(term) logger.debug('storing new HPO terms') self.save() if added_terms is not None and len(added_terms) > 0: for case_obj in ind_obj.cases: self.update_hpolist(case_obj) return added_terms
def get_hpo(variant_str): samples=get_samples(variant_str) #chrom,pos,ref,alt,=str(variant_str.strip()).split('-') d=csv.DictReader(file('/data/uclex_data/UCLexInfo/uclex-samples.csv','r'),delimiter=',') hpo=[] for r in d: if r['sample'] not in samples: continue pheno=r['phenotype'] print((r['sample'],pheno,)) if pheno.startswith('HP'): hpo+=[phizz.query_hpo([pheno])] elif pheno.startswith('MIM'): hpo+=[phizz.query_disease([pheno])] return(hpo)
def phenotypes(variant): for s in variant['HOM']+variant['HET']: for hpo in phenotips.patient_hpo(eid=s,auth=args.login): if hpo.startswith('HP'): variant['HPO']=variant.get('HPO',[])+phizz.query_hpo([hpo]) elif hpo.startswith('MIM'): variant['HPO']=variant.get('HPO',[])+phizz.query_disease([hpo]) #for hpo in variant['HPO']: variant['HPO'+=get_ancestors(hpo['hpo_term']) if 'HPO' in variant: #variant['HPO']=list(set([h['hpo_term'] for h in variant['HPO']])) variant['HPO']=[h['hpo_term'] for h in variant['HPO']] else: variant['HPO']=[] return(variant)
def phenotypes(variant): for s in variant['HOM'] + variant['HET']: for hpo in phenotips.patient_hpo(eid=s, auth=args.login): if hpo.startswith('HP'): variant['HPO'] = variant.get('HPO', []) + phizz.query_hpo( [hpo]) elif hpo.startswith('MIM'): variant['HPO'] = variant.get('HPO', []) + phizz.query_disease( [hpo]) #for hpo in variant['HPO']: variant['HPO'+=get_ancestors(hpo['hpo_term']) if 'HPO' in variant: #variant['HPO']=list(set([h['hpo_term'] for h in variant['HPO']])) variant['HPO'] = [h['hpo_term'] for h in variant['HPO']] else: variant['HPO'] = [] return (variant)
def add_phenotype(self, ind_obj, phenotype_id): """Add a phenotype term to the case.""" if phenotype_id.startswith("HP:") or len(phenotype_id) == 7: logger.debug("querying on HPO term") hpo_results = phizz.query_hpo([phenotype_id]) else: logger.debug("querying on OMIM term") hpo_results = phizz.query_disease([phenotype_id]) added_terms = [] existing_ids = set(term.phenotype_id for term in ind_obj.phenotypes) for result in hpo_results: if result["hpo_term"] not in existing_ids: term = PhenotypeTerm(phenotype_id=result["hpo_term"], description=result["description"]) logger.info("adding new HPO term: %s", term.phenotype_id) ind_obj.phenotypes.append(term) added_terms.append(term) logger.debug("storing new HPO terms") self.save() return added_terms
def add_phenotype(self, institute, case, user, link, hpo_term=None, omim_term=None): """Add a new phenotype term to a case Create a phenotype term and event with the given information Args: institute (Institute): A Institute object case (Case): Case object user (User): A User object link (str): The url to be used in the event hpo_term (str): A hpo id omim_term (str): A omim id """ try: if hpo_term: logger.debug("Fetching info for hpo term {0}".format(hpo_term)) hpo_results = phizz.query_hpo([hpo_term]) elif omim_term: logger.debug("Fetching info for mim term {0}".format(omim_term)) hpo_results = phizz.query_disease([omim_term]) else: raise ValueError('Must supply either hpo or omim term') logger.debug("Got result {0}".format( ', '.join(res['hpo_term'] for res in hpo_results))) except ValueError as e: #TODO Should ve raise a more proper exception here? raise e phenotype_terms = [] for hpo_result in hpo_results: phenotype_name = hpo_result['hpo_term'] description = hpo_result['description'] phenotype_term = PhenotypeTerm( phenotype_id=phenotype_name, feature=description ) phenotype_terms.append(phenotype_term) logger.info("Append the phenotype term {0} to case {1}".format( phenotype_name, case.display_name )) case.phenotype_terms.append(phenotype_term) logger.info("Creating event for adding phenotype term for case"\ " {0}".format(case.display_name)) self.create_event( institute=institute, case=case, user=user, link=link, category='case', verb='add_phenotype', subject=case.display_name, content = phenotype_name ) case.save() logger.debug("Case updated")
def get_awesomebar_result(db, query): """ Similar to the above, but this is after a user types enter We need to figure out what they meant - could be gene, variant, region Return tuple of (datatype, identifier) Where datatype is one of 'gene', 'variant', or 'region' And identifier is one of: - ensembl ID for gene - variant ID string for variant (eg. 1-1000-A-T) - region ID string for region (eg. 1-1000-2000) Follow these steps: - if query is an ensembl ID, return it - if a gene symbol, return that gene's ensembl ID - if an RSID, return that variant's string Finally, note that we don't return the whole object here - only it's identifier. This could be important for performance later """ query = query.strip() print 'Query: %s' % query if query.startswith('HP:'): description=phizz.query_hpo([query]) #description=hpo_db.hpo.find_one({'hpo_id':query}) return 'hpo', query if query.startswith('MIM'): disease=phizz.query_disease([query]) return 'mim', query # Variant variant = orm.get_variants_by_rsid(db, query.lower()) if variant: if len(variant) == 1: return 'variant', variant[0]['variant_id'] else: return 'dbsnp_variant_set', variant[0]['rsid'] variant = get_variants_from_dbsnp(db, query.lower()) if variant: return 'variant', variant[0]['variant_id'] # variant = get_variant(db, ) # TODO - https://github.com/brettpthomas/exac_browser/issues/14 gene = get_gene_by_name(db, query) if gene: return 'gene', gene['gene_id'] # From here out, all should be uppercase (gene, tx, region, variant_id) query = query.upper() gene = get_gene_by_name(db, query) if gene: return 'gene', gene['gene_id'] # Ensembl formatted queries if query.startswith('ENS'): # Gene gene = get_gene(db, query) if gene: return 'gene', gene['gene_id'] # Transcript transcript = get_transcript(db, query) if transcript: return 'transcript', transcript['transcript_id'] # From here on out, only region queries if query.startswith('CHR'): query = query.lstrip('CHR') # Region m = R1.match(query) if m: if int(m.group(3)) < int(m.group(2)): return 'region', 'invalid' return 'region', '{}-{}-{}'.format(m.group(1), m.group(2), m.group(3)) m = R2.match(query) if m: return 'region', '{}-{}-{}'.format(m.group(1), m.group(2), m.group(2)) m = R3.match(query) if m: return 'region', '{}'.format(m.group(1)) m = R4.match(query) if m: return 'variant', '{}-{}-{}-{}'.format(m.group(1), m.group(2), m.group(3), m.group(4)) return 'not_found', query