def extract_cancer_types(self): """ Returns all cancer types located in the match tree :param g: DiGraph match tree :return: List of cancer types """ diagnoses = [] cancer_types_expanded = [] primary_cancer_types = [] excluded_cancer_types = [] onco_tree = oncotreenx.build_oncotree(file_path=TUMOR_TREE) liquid_children_txt, solid_children_txt = expand_liquid_oncotree(onco_tree) # iterate through the graph for node_id in list(nx.dfs_postorder_nodes(self.g, source=1)): node = self.g.node[node_id] if node['type'] == 'clinical': if 'oncotree_primary_diagnosis' in node['value']: diagnosis = node['value']['oncotree_primary_diagnosis'] n = oncotreenx.lookup_text(onco_tree, diagnosis.replace('!', '')) children = list(nx.dfs_tree(onco_tree, n)) if diagnosis == '_SOLID_': children_txt = solid_children_txt primary_parent = 'All Solid Tumors' parents_txt = ['All Solid Tumors'] elif diagnosis == '_LIQUID_': children_txt = liquid_children_txt primary_parent = 'All Liquid Tumors' parents_txt = ['All Liquid Tumors'] else: children_txt = [onco_tree.node[nn]['text'] for nn in children] if n is not None: parents, parents_txt, primary_parent = get_parents(onco_tree, n) else: parents_txt = [] primary_parent = '' diagnoses.append(diagnosis) if diagnosis.startswith('!'): excluded_cancer_types.append(diagnosis.replace('!', '')) excluded_cancer_types.extend(children_txt) else: primary_tumors = get_primary_tumors() cancer_types_expanded.append(parse_diagnosis(diagnosis)) cancer_types_expanded.extend(children_txt) cancer_types_expanded.extend([i for i in parents_txt if i.split()[0] not in primary_tumors]) primary_cancer_types.append(primary_parent) return { 'diagnoses': list(set(i for i in diagnoses if i.strip() != 'root')), 'cancer_types_expanded': list(set(i for i in cancer_types_expanded if i.strip() != 'root')), 'primary_cancer_types': list(set(i for i in primary_cancer_types if i.strip() != 'root')), 'excluded_cancer_types': list(set(i for i in excluded_cancer_types if i.strip() != 'root')) }
def extract_cancer_types(self): """ Returns all cancer types located in the match tree :param g: DiGraph match tree :return: List of cancer types """ diagnoses = [] cancer_types_expanded = [] primary_cancer_types = [] excluded_cancer_types = [] onco_tree = oncotreenx.build_oncotree(file_path=TUMOR_TREE) liquid_children_txt, solid_children_txt = expand_liquid_oncotree(onco_tree) # iterate through the graph for node_id in list(nx.dfs_postorder_nodes(self.g, source=1)): node = self.g.nodes[node_id] if node['type'] == 'clinical': if 'oncotree_primary_diagnosis' in node['value']: diagnosis = node['value']['oncotree_primary_diagnosis'] n = oncotreenx.lookup_text(onco_tree, diagnosis.replace('!', '')) children = list(nx.dfs_tree(onco_tree, n)) if diagnosis == '_SOLID_': children_txt = solid_children_txt primary_parent = 'All Solid Tumors' parents_txt = ['All Solid Tumors'] elif diagnosis == '_LIQUID_': children_txt = liquid_children_txt primary_parent = 'All Liquid Tumors' parents_txt = ['All Liquid Tumors'] else: children_txt = [onco_tree.nodes[nn]['text'] for nn in children] if n is not None: parents, parents_txt, primary_parent = get_parents(onco_tree, n) else: parents_txt = [] primary_parent = '' diagnoses.append(diagnosis) if diagnosis.startswith('!'): excluded_cancer_types.append(diagnosis.replace('!', '')) excluded_cancer_types.extend(children_txt) else: primary_tumors = get_primary_tumors() cancer_types_expanded.append(parse_diagnosis(diagnosis)) cancer_types_expanded.extend(children_txt) cancer_types_expanded.extend([i for i in parents_txt if i.split()[0] not in primary_tumors]) primary_cancer_types.append(primary_parent) return { 'diagnoses': list(set(i for i in diagnoses if i.strip() != 'root')), 'cancer_types_expanded': list(set(i for i in cancer_types_expanded if i.strip() != 'root')), 'primary_cancer_types': list(set(i for i in primary_cancer_types if i.strip() != 'root')), 'excluded_cancer_types': list(set(i for i in excluded_cancer_types if i.strip() != 'root')) }
def reset_elasticsearch_mapping(): logging.info("reset_elasticsearch_mapping") with open(ES_MAPPING) as es_mapping_file_handle: json_payload = json.load(es_mapping_file_handle)['trial'] ot = build_oncotree(TUMOR_TREE) order = ["All Solid Tumors", "All Liquid Tumors"] top_level_ot = sorted([ node for node, path in nx.shortest_path(ot, 'root').items() if len(path) == 2 ], key=lambda x: ot.node[x]['text']) for top_level in top_level_ot: order.append(ot.node[top_level]['text']) if '/' in ot.node[top_level]['text']: order = order + ot.node[top_level]['text'].split('/') second_level_ot = sorted( [item for item in nx.descendants(ot, top_level)], key=lambda x: ot.node[x]['text']) for second_level in second_level_ot: order.append(ot.node[second_level]['text']) third_level_ot = sorted( [item for item in nx.descendants(ot, second_level)], key=lambda x: ot.node[x]['text']) for third_level in third_level_ot: order.append(third_level) json_payload["_meta"] = dict(json_payload.setdefault('_meta', dict()), **{"tumor_type_sort_order": order}) r = requests.put(ES_URI + "/_mapping/trial", json=json_payload, auth=HTTPBasicAuth(ES_USER, ES_PASSWORD)) msg = {"status_code": r.status_code, "text": r.text} logging.info(msg) return msg
def test_build_oncotree(self): # build the tree. g = build_oncotree(file_path='data/tumor_types.txt') # assert we have some nodes. assert len(g.nodes()) > 10
def __init__(self, args): self.args = args self.mda_df = None self.vicc_df = None self.uhn_df = None self.grcc_df = None self.oncotree = oncotreenx.build_oncotree()
def test_ncit(self): # loop over tree type. for x in ['data/tumor_types.old.txt', 'data/tumor_types.txt']: # build the tree. g = build_oncotree(file_path=x) # spot check several values. g.node['BLOOD']['metanci'] == 'C12434' g.node['PT']['metanci'] == 'C7575'
def unique_query(): # parse parameters status, val = parse_resource_field() # bad args. if status == 1: return val # good args. resource, field = val # special case for oncotree. if resource == 'clinical' and field == 'ONCOTREE_PRIMARY_DIAGNOSIS_NAME': # make oncotree. onco_tree = oncotreenx.build_oncotree(settings.DATA_ONCOTREE_FILE) # loop over every-node and do text match. #results = set([onco_tree.node[n]['text'] for n in onco_tree.nodes()]) #results.remove("root") #results = list(results) # turn into results = list() for n in onco_tree.nodes(): tmp = { 'text': onco_tree.node[n]['text'], 'code': n } results.append(tmp) else: # search for this field. db = app.data.driver.db results = db[resource].distinct(field) # remove non. tmp = set(results) if None in tmp: tmp.remove(None) results = list(tmp) # encode response. data = json.dumps({'resource': resource, 'field': field, 'values': results}) resp = Response(response=data, status=200, mimetype="application/json") return resp
def __init__(self, args): self.args = args self.sample_df = None self.patient_df = None self.impact341_df = pd.DataFrame(columns=OUT_COLS) self.impact410_df = pd.DataFrame(columns=OUT_COLS) self.panel_dict = { 'MSK-IMPACT341': self.impact341_df, 'MSK-IMPACT410': self.impact410_df } self.oncotree = oncotreenx.build_oncotree() self.i = 0
def build_oncotree(): """Builds oncotree""" return oncotreenx.build_oncotree(file_path=TUMOR_TREE)
def prepare_criteria(item): onco_tree = oncotreenx.build_oncotree(settings.DATA_ONCOTREE_FILE) c = {} clin_txt_1 = "" clin_txt_2_gender = "" clin_txt_2_age = "" if 'clinical_filter' in item: clin_tmp = json.dumps(item['clinical_filter']) for key, val in REREPLACEMENTS.items(): clin_tmp = clin_tmp.replace(key, val) c = json.loads(clin_tmp) if 'GENDER' in item['clinical_filter']: clin_txt_2_gender = item['clinical_filter']['GENDER'] if 'BIRTH_DATE' in item['clinical_filter']: op = next(iter(item['clinical_filter']['BIRTH_DATE'].keys())) val = next(iter(item['clinical_filter']['BIRTH_DATE'].values())) try: val = datetime.datetime.strptime(val.replace(" GMT", ""), '%a, %d %b %Y %H:%M:%S') except ValueError: val = dateutil.parser.parse(val) # compute the age. today = datetime.date.today() tmp = today.year - val.year - ((today.month, today.day) < (val.month, val.day - 1)) val = tmp if op.count("gte") > 0: clin_txt_2_age = "< %s" % val else: clin_txt_2_age = "> %s" % val # parse date-times. for key in ['BIRTH_DATE', 'REPORT_DATE']: if key not in c: continue # extract the expression value. lkey, lval = next(iter(c[key].keys())), next(iter(c[key].values())) try: c[key][lkey] = datetime.datetime.strptime( lval.replace(" GMT", ""), '%a, %d %b %Y %H:%M:%S') except ValueError: c[key][lkey] = dateutil.parser.parse(lval) # expand oncotree if 'ONCOTREE_PRIMARY_DIAGNOSIS_NAME' in item['clinical_filter']: txt = item['clinical_filter']['ONCOTREE_PRIMARY_DIAGNOSIS_NAME'] if txt == "_LIQUID_" or txt == "_SOLID_": node1 = oncotreenx.lookup_text(onco_tree, "Lymph") node2 = oncotreenx.lookup_text(onco_tree, "Blood") nodes1 = list(nx.dfs_tree(onco_tree, node1)) nodes2 = list(nx.dfs_tree(onco_tree, node2)) nodes = list(set(nodes1).union(set(nodes2))) if txt == "_SOLID_": all_nodes = set(list(onco_tree.nodes())) tmp_nodes = all_nodes - set(nodes) nodes = list(tmp_nodes) clin_txt_1 = "%s cancers" % txt.replace("_", "").title() else: clin_txt_1 = "%s" % txt node = oncotreenx.lookup_text(onco_tree, txt) if onco_tree.has_node(node): nodes = list(nx.dfs_tree(onco_tree, node)) nodes_txt = [onco_tree.node[n]['text'] for n in nodes] c['ONCOTREE_PRIMARY_DIAGNOSIS_NAME'] = {'$in': nodes_txt} g = {} gen_txt = [] if 'genomic_filter' in item: gen_tmp = json.dumps(item['genomic_filter']) for key, val in REREPLACEMENTS.items(): gen_tmp = gen_tmp.replace(key, val) g = json.loads(gen_tmp) # add TRUE_HUGO_SYMBOL value mutational signature filter queries if 'TRUE_HUGO_SYMBOL' in g and g['TRUE_HUGO_SYMBOL'] == {'$in': ['']}: g['TRUE_HUGO_SYMBOL'] = None sv_test = False mut_test = False cnv_test = False if 'VARIANT_CATEGORY' in item['genomic_filter']: variant_category = item['genomic_filter']['VARIANT_CATEGORY'] if isinstance(variant_category, dict): for x in variant_category.values(): if "SV" in set(x): sv_test = True if "CNV" in set(x): cnv_test = True if "MUTATION" in set(x): mut_test = True elif item['genomic_filter']['VARIANT_CATEGORY'] == 'SV': sv_test = True elif item['genomic_filter']['VARIANT_CATEGORY'] == 'CNV': cnv_test = True elif item['genomic_filter']['VARIANT_CATEGORY'] == 'MUTATION': mut_test = True # build text. exon_txt = "" protein_txt = "" if mut_test: gen_txt.append("Mutation") if 'TRUE_EXON_CHANGE' in item['genomic_filter']: exon_txt = item['genomic_filter']['TRUE_EXON_CHANGE'] if 'TRUE_PROTEIN_CHANGE' in item['genomic_filter']: protein_txt = item['genomic_filter']['TRUE_PROTEIN_CHANGE'] if cnv_test: if 'CNV_CALL' in g: if isinstance(g['CNV_CALL'], dict): gen_txt += next(iter(g['CNV_CALL'].values())) else: gen_txt.append(g['CNV_CALL']) if sv_test: gen_txt.append("Structural rearrangement") if 'MMR_STATUS' in item['genomic_filter']: gen_txt.append(item['genomic_filter']['MMR_STATUS']) if 'TABACCO_STATUS' in item['genomic_filter']: gen_txt.append('Tobacco Mutational Signature') if 'TEMOZOLOMIDE_STATUS' in item['genomic_filter']: gen_txt.append('Temozolomide Mutational Signature') if 'POLE_STATUS' in item['genomic_filter']: gen_txt.append('PolE Mutational Signature') if 'APOBEC_STATUS' in item['genomic_filter']: gen_txt.append('APOBEC Mutational Signature') if 'UVA_STATUS' in item['genomic_filter']: gen_txt.append('UVA Mutational Signature') clauses = [] if mut_test: clause = { 'VARIANT_CATEGORY': 'MUTATION', 'TRUE_HUGO_SYMBOL': g['TRUE_HUGO_SYMBOL'] } if 'WILDTYPE' in g: clause['WILDTYPE'] = g['WILDTYPE'] if 'TRUE_PROTEIN_CHANGE' in g: clause['TRUE_PROTEIN_CHANGE'] = g['TRUE_PROTEIN_CHANGE'] clauses.append(clause) if cnv_test: clause = { 'VARIANT_CATEGORY': 'CNV', 'TRUE_HUGO_SYMBOL': g['TRUE_HUGO_SYMBOL'], } if 'CNV_CALL' in g: clause['CNV_CALL'] = g['CNV_CALL'] if 'WILDTYPE' in g: clause['WILDTYPE'] = g['WILDTYPE'] clauses.append(clause) if sv_test: true_hugo = item['genomic_filter']['TRUE_HUGO_SYMBOL'] if isinstance(true_hugo, dict): genes = next(iter(true_hugo.values())) else: genes = [true_hugo] to_add = list() for gene in genes: if gene in synonyms: to_add += synonyms[gene] genes = genes + to_add abc = '|'.join([ rf"(.*\W{gene}\W.*)|(^{gene}\W.*)|(.*\W{gene}$)" for gene in genes ]) clauses.append({'STRUCTURAL_VARIANT_COMMENT': {"$regex": abc}}) clauses.append({'LEFT_PARTNER_GENE': {'$in': genes}}) clauses.append({'RIGHT_PARTNER_GENE': {'$in': genes}}) if len(clauses) > 0: g = {"$or": clauses} for key in item['genomic_filter']: special_clauses = { 'STRUCTURAL_VARIANT_COMMENT', 'VARIANT_CATEGORY', 'TRUE_HUGO_SYMBOL', 'CNV_CALL', 'WILDTYPE', 'TRUE_PROTEIN_CHANGE' } if key in special_clauses: continue g[key] = item['genomic_filter'][key] get_recursively(g, "GMT") if 'TRUE_HUGO_SYMBOL' in item['genomic_filter']: if isinstance(item['genomic_filter']['TRUE_HUGO_SYMBOL'], dict): genes = next( iter(item['genomic_filter']['TRUE_HUGO_SYMBOL'].values())) else: genes = [item['genomic_filter']['TRUE_HUGO_SYMBOL']] genes = [str(i) for i in genes] genes = ', '.join(genes) if len(gen_txt) > 1: gen_txt = "%s: %s" % (genes, ', '.join(gen_txt)) else: if exon_txt == "" and protein_txt == "": gen_txt = "%s %s" % (genes, ', '.join(gen_txt)) elif exon_txt != "": gen_txt = "%s exon %s" % (genes, exon_txt) else: gen_txt = "%s %s" % (genes, protein_txt) return c, g, (gen_txt, [clin_txt_1, clin_txt_2_age, clin_txt_2_gender])
def prepare_criteria(item): onco_tree = oncotreenx.build_oncotree(settings.DATA_ONCOTREE_FILE) c = {} clin_txt_1 = "" clin_txt_2_gender = "" clin_txt_2_age = "" if 'clinical_filter' in item: clin_tmp = json.dumps(item['clinical_filter']) for key, val in REREPLACEMENTS.items(): clin_tmp = clin_tmp.replace(key, val) c = json.loads(clin_tmp) if 'GENDER' in item['clinical_filter']: clin_txt_2_gender = item['clinical_filter']['GENDER'] if 'BIRTH_DATE' in item['clinical_filter']: op = item['clinical_filter']['BIRTH_DATE'].keys()[0] val = item['clinical_filter']['BIRTH_DATE'].values()[0] try: val = datetime.datetime.strptime(val.replace(" GMT", ""), '%a, %d %b %Y %H:%M:%S') except ValueError: val = dateutil.parser.parse(val) # compute the age. today = datetime.date.today() tmp = today.year - val.year - ((today.month, today.day) < (val.month, val.day - 1)) val = tmp if op.count("gte") > 0: clin_txt_2_age = "< %s" % val else: clin_txt_2_age = "> %s" % val # parse date-times. for key in ['BIRTH_DATE', 'REPORT_DATE']: if key not in c: continue # extract the expression value. lkey, lval = c[key].keys()[0], c[key].values()[0] try: c[key][lkey] = datetime.datetime.strptime(lval.replace(" GMT", ""), '%a, %d %b %Y %H:%M:%S') except ValueError: c[key][lkey] = dateutil.parser.parse(lval) # expand oncotree if 'ONCOTREE_PRIMARY_DIAGNOSIS_NAME' in item['clinical_filter']: txt = item['clinical_filter']['ONCOTREE_PRIMARY_DIAGNOSIS_NAME'] if txt == "_LIQUID_" or txt == "_SOLID_": node1 = oncotreenx.lookup_text(onco_tree, "Lymph") node2 = oncotreenx.lookup_text(onco_tree, "Blood") nodes1 = list(nx.dfs_tree(onco_tree, node1)) nodes2 = list(nx.dfs_tree(onco_tree, node2)) nodes = list(set(nodes1).union(set(nodes2))) if txt == "_SOLID_": all_nodes = set(list(onco_tree.nodes())) tmp_nodes = all_nodes - set(nodes) nodes = list(tmp_nodes) clin_txt_1 = "%s cancers" % txt.replace("_", "").title() else: clin_txt_1 = "%s" % txt node = oncotreenx.lookup_text(onco_tree, txt) if onco_tree.has_node(node): nodes = list(nx.dfs_tree(onco_tree, node)) nodes_txt = [onco_tree.node[n]['text'] for n in nodes] c['ONCOTREE_PRIMARY_DIAGNOSIS_NAME'] = {'$in': nodes_txt} g = {} gen_txt = [] if 'genomic_filter' in item: gen_tmp = json.dumps(item['genomic_filter']) for key, val in REREPLACEMENTS.items(): gen_tmp = gen_tmp.replace(key, val) g = json.loads(gen_tmp) # add TRUE_HUGO_SYMBOL value mutational signature filter queries if 'TRUE_HUGO_SYMBOL' in g and g['TRUE_HUGO_SYMBOL'] == {'$in': ['']}: g['TRUE_HUGO_SYMBOL'] = None sv_test = False mut_test = False cnv_test = False if 'VARIANT_CATEGORY' in item['genomic_filter']: variant_category = item['genomic_filter']['VARIANT_CATEGORY'] if isinstance(variant_category, dict): for x in variant_category.values(): if "SV" in set(x): sv_test = True if "CNV" in set(x): cnv_test = True if "MUTATION" in set(x): mut_test = True elif item['genomic_filter']['VARIANT_CATEGORY'] == 'SV': sv_test = True elif item['genomic_filter']['VARIANT_CATEGORY'] == 'CNV': cnv_test = True elif item['genomic_filter']['VARIANT_CATEGORY'] == 'MUTATION': mut_test = True # build text. exon_txt = "" protein_txt = "" if mut_test: gen_txt.append("Mutation") if 'TRUE_EXON_CHANGE' in item['genomic_filter']: exon_txt = item['genomic_filter']['TRUE_EXON_CHANGE'] if 'TRUE_PROTEIN_CHANGE' in item['genomic_filter']: protein_txt = item['genomic_filter']['TRUE_PROTEIN_CHANGE'] if cnv_test: if 'CNV_CALL' in g: if isinstance(g['CNV_CALL'], dict): gen_txt += g['CNV_CALL'].values()[0] else: gen_txt.append(g['CNV_CALL']) if sv_test: gen_txt.append("Structural rearrangement") if 'MMR_STATUS' in item['genomic_filter']: gen_txt.append(item['genomic_filter']['MMR_STATUS']) if 'TABACCO_STATUS' in item['genomic_filter']: gen_txt.append('Tobacco Mutational Signature') if 'TEMOZOLOMIDE_STATUS' in item['genomic_filter']: gen_txt.append('Temozolomide Mutational Signature') if 'POLE_STATUS' in item['genomic_filter']: gen_txt.append('PolE Mutational Signature') if 'APOBEC_STATUS' in item['genomic_filter']: gen_txt.append('APOBEC Mutational Signature') if 'UVA_STATUS' in item['genomic_filter']: gen_txt.append('UVA Mutational Signature') clauses = [] if mut_test: clause = { 'VARIANT_CATEGORY': 'MUTATION', 'TRUE_HUGO_SYMBOL': g['TRUE_HUGO_SYMBOL'] } if 'WILDTYPE' in g: clause['WILDTYPE'] = g['WILDTYPE'] if 'TRUE_PROTEIN_CHANGE' in g: clause['TRUE_PROTEIN_CHANGE'] = g['TRUE_PROTEIN_CHANGE'] clauses.append(clause) if cnv_test: clause = { 'VARIANT_CATEGORY': 'CNV', 'TRUE_HUGO_SYMBOL': g['TRUE_HUGO_SYMBOL'], } if 'CNV_CALL' in g: clause['CNV_CALL'] = g['CNV_CALL'] if 'WILDTYPE' in g: clause['WILDTYPE'] = g['WILDTYPE'] clauses.append(clause) if sv_test: true_hugo = item['genomic_filter']['TRUE_HUGO_SYMBOL'] if isinstance(true_hugo, dict): genes = true_hugo.values()[0] else: genes = [true_hugo] to_add = list() for gene in genes: if gene in synonyms: to_add += synonyms[gene] genes = genes + to_add sv_clauses = [] for gene in genes: abc = "(.*\W%s\W.*)|(^%s\W.*)|(.*\W%s$)" % (gene, gene, gene) sv_clauses.append(re.compile(abc, re.IGNORECASE)) clause = { 'STRUCTURAL_VARIANT_COMMENT': {"$in": sv_clauses} } clauses.append(clause) if len(clauses) > 0: g = { "$or": clauses } for key in item['genomic_filter']: special_clauses = { 'STRUCTURAL_VARIANT_COMMENT', 'VARIANT_CATEGORY', 'TRUE_HUGO_SYMBOL', 'CNV_CALL', 'WILDTYPE', 'TRUE_PROTEIN_CHANGE' } if key in special_clauses: continue g[key] = item['genomic_filter'][key] get_recursively(g, "GMT") if 'TRUE_HUGO_SYMBOL' in item['genomic_filter']: if isinstance(item['genomic_filter']['TRUE_HUGO_SYMBOL'], dict): genes = item['genomic_filter']['TRUE_HUGO_SYMBOL'].values()[0] else: genes = [item['genomic_filter']['TRUE_HUGO_SYMBOL']] genes = [str(i) for i in genes] genes = ', '.join(genes) if len(gen_txt) > 1: gen_txt = "%s: %s" % (genes, ', '.join(gen_txt)) else: if exon_txt == "" and protein_txt == "": gen_txt = "%s %s" % (genes, ', '.join(gen_txt)) elif exon_txt != "": gen_txt = "%s exon %s" % (genes, exon_txt) else: gen_txt = "%s %s" % (genes, protein_txt) return c, g, (gen_txt, [clin_txt_1, clin_txt_2_age, clin_txt_2_gender])
def autocomplete_query(): # parse parameters status, val = parse_resource_field() # parse the value. value = request.args.get("value") gene = request.args.get("gene") # bad args. if status == 1: return val # good args. resource, field = val # get the type. if resource == "genomic": schema = data_model.genomic_schema[field]['type'] else: schema = data_model.clinical_schema[field]['type'] # special cases. if resource == 'clinical' and field == 'ONCOTREE_PRIMARY_DIAGNOSIS_NAME': # make oncotree. onco_tree = oncotreenx.build_oncotree(settings.DATA_ONCOTREE_FILE) # loop over every-node and do text match. hit_set = set() for n in onco_tree.nodes(): #TODO: Verify this doesn't need a decode a = onco_tree.node[n]['text'].lower() b = value.lower() if a.count(b) > 0: # get predecessors and ancestors hit_set.add(n) hit_set = hit_set.union(set(onco_tree.predecessors(n))) hit_set = hit_set.union(set(onco_tree.successors(n))) # remove root. if 'root' in hit_set: hit_set.remove('root') # convert to full text. results = [onco_tree.node[n]['text'] for n in hit_set] else: # only support string and integer. if schema not in set(['string', 'integer']): data = json.dumps({'error': 'unsupported field type: %s' % schema}) resp = Response(response=data, status=400, mimetype="application/json") return resp # handle string. db = app.data.driver.db if schema == "string": # finalize search term. term = '.*%s.*' % value # first make query. if gene is None: query = db[resource].find({ field: {'$regex': term, '$options': '-i'} }) else: query = db[resource].find({"$and": [ {field: {'$regex': term, '$options': '-i'}}, {"$or": [ {'TRUE_HUGO_SYMBOL': gene}, {'CNV_HUGO_SYMBOL': gene}, ]} ]}) else: # finalize the search term. term = "/^%s.*/.test(this.%s)" % (value, field) # first make the query if gene is None: query = db[resource].find({"$and": [ {'$where': term}, ]}) else: query = db[resource].find({"$and": [ {'$where': term}, {"$or": [ {'TRUE_HUGO_SYMBOL': gene}, {'CNV_HUGO_SYMBOL': gene}, ]} ]}) # extract distinct from query results = query.distinct(field) # remove non. tmp = set(results) if None in tmp: tmp.remove(None) results = list(tmp) # encode response. data = json.dumps({'resource': resource, 'field': field, 'values': results}) resp = Response(response=data, status=200, mimetype="application/json") return resp
def test_expand_liquid_oncotree(self): onco_tree = oncotreenx.build_oncotree(file_path=TUMOR_TREE) l, s, = expand_liquid_oncotree(onco_tree) assert 'Leukemia' in l assert 'Leukemia' not in s
def setup(self): self.g = build_oncotree(file_path="data/tumor_types.txt")