Ejemplo n.º 1
0
    def extract_cancer_types(self):
        """
        Returns all cancer types located in the match tree

        :param g: DiGraph match tree
        :return: List of cancer types
        """

        diagnoses = []
        cancer_types_expanded = []
        primary_cancer_types = []
        excluded_cancer_types = []
        onco_tree = oncotreenx.build_oncotree(file_path=TUMOR_TREE)
        liquid_children_txt, solid_children_txt = expand_liquid_oncotree(onco_tree)

        # iterate through the graph
        for node_id in list(nx.dfs_postorder_nodes(self.g, source=1)):
            node = self.g.node[node_id]
            if node['type'] == 'clinical':
                if 'oncotree_primary_diagnosis' in node['value']:

                    diagnosis = node['value']['oncotree_primary_diagnosis']

                    n = oncotreenx.lookup_text(onco_tree, diagnosis.replace('!', ''))
                    children = list(nx.dfs_tree(onco_tree, n))

                    if diagnosis == '_SOLID_':
                        children_txt = solid_children_txt
                        primary_parent = 'All Solid Tumors'
                        parents_txt = ['All Solid Tumors']
                    elif diagnosis == '_LIQUID_':
                        children_txt = liquid_children_txt
                        primary_parent = 'All Liquid Tumors'
                        parents_txt = ['All Liquid Tumors']
                    else:
                        children_txt = [onco_tree.node[nn]['text'] for nn in children]

                        if n is not None:
                            parents, parents_txt, primary_parent = get_parents(onco_tree, n)
                        else:
                            parents_txt = []
                            primary_parent = ''

                    diagnoses.append(diagnosis)
                    if diagnosis.startswith('!'):
                        excluded_cancer_types.append(diagnosis.replace('!', ''))
                        excluded_cancer_types.extend(children_txt)
                    else:
                        primary_tumors = get_primary_tumors()
                        cancer_types_expanded.append(parse_diagnosis(diagnosis))
                        cancer_types_expanded.extend(children_txt)
                        cancer_types_expanded.extend([i for i in parents_txt if i.split()[0] not in primary_tumors])
                        primary_cancer_types.append(primary_parent)

        return {
            'diagnoses': list(set(i for i in diagnoses if i.strip() != 'root')),
            'cancer_types_expanded': list(set(i for i in cancer_types_expanded if i.strip() != 'root')),
            'primary_cancer_types': list(set(i for i in primary_cancer_types if i.strip() != 'root')),
            'excluded_cancer_types': list(set(i for i in excluded_cancer_types if i.strip() != 'root'))
        }
Ejemplo n.º 2
0
    def extract_cancer_types(self):
        """
        Returns all cancer types located in the match tree

        :param g: DiGraph match tree
        :return: List of cancer types
        """

        diagnoses = []
        cancer_types_expanded = []
        primary_cancer_types = []
        excluded_cancer_types = []
        onco_tree = oncotreenx.build_oncotree(file_path=TUMOR_TREE)
        liquid_children_txt, solid_children_txt = expand_liquid_oncotree(onco_tree)

        # iterate through the graph
        for node_id in list(nx.dfs_postorder_nodes(self.g, source=1)):
            node = self.g.nodes[node_id]
            if node['type'] == 'clinical':
                if 'oncotree_primary_diagnosis' in node['value']:

                    diagnosis = node['value']['oncotree_primary_diagnosis']

                    n = oncotreenx.lookup_text(onco_tree, diagnosis.replace('!', ''))
                    children = list(nx.dfs_tree(onco_tree, n))

                    if diagnosis == '_SOLID_':
                        children_txt = solid_children_txt
                        primary_parent = 'All Solid Tumors'
                        parents_txt = ['All Solid Tumors']
                    elif diagnosis == '_LIQUID_':
                        children_txt = liquid_children_txt
                        primary_parent = 'All Liquid Tumors'
                        parents_txt = ['All Liquid Tumors']
                    else:
                        children_txt = [onco_tree.nodes[nn]['text'] for nn in children]

                        if n is not None:
                            parents, parents_txt, primary_parent = get_parents(onco_tree, n)
                        else:
                            parents_txt = []
                            primary_parent = ''

                    diagnoses.append(diagnosis)
                    if diagnosis.startswith('!'):
                        excluded_cancer_types.append(diagnosis.replace('!', ''))
                        excluded_cancer_types.extend(children_txt)
                    else:
                        primary_tumors = get_primary_tumors()
                        cancer_types_expanded.append(parse_diagnosis(diagnosis))
                        cancer_types_expanded.extend(children_txt)
                        cancer_types_expanded.extend([i for i in parents_txt if i.split()[0] not in primary_tumors])
                        primary_cancer_types.append(primary_parent)

        return {
            'diagnoses': list(set(i for i in diagnoses if i.strip() != 'root')),
            'cancer_types_expanded': list(set(i for i in cancer_types_expanded if i.strip() != 'root')),
            'primary_cancer_types': list(set(i for i in primary_cancer_types if i.strip() != 'root')),
            'excluded_cancer_types': list(set(i for i in excluded_cancer_types if i.strip() != 'root'))
        }
Ejemplo n.º 3
0
def reset_elasticsearch_mapping():
    logging.info("reset_elasticsearch_mapping")
    with open(ES_MAPPING) as es_mapping_file_handle:
        json_payload = json.load(es_mapping_file_handle)['trial']

    ot = build_oncotree(TUMOR_TREE)
    order = ["All Solid Tumors", "All Liquid Tumors"]
    top_level_ot = sorted([
        node for node, path in nx.shortest_path(ot, 'root').items()
        if len(path) == 2
    ],
                          key=lambda x: ot.node[x]['text'])
    for top_level in top_level_ot:
        order.append(ot.node[top_level]['text'])
        if '/' in ot.node[top_level]['text']:
            order = order + ot.node[top_level]['text'].split('/')
        second_level_ot = sorted(
            [item for item in nx.descendants(ot, top_level)],
            key=lambda x: ot.node[x]['text'])
        for second_level in second_level_ot:
            order.append(ot.node[second_level]['text'])
            third_level_ot = sorted(
                [item for item in nx.descendants(ot, second_level)],
                key=lambda x: ot.node[x]['text'])
            for third_level in third_level_ot:
                order.append(third_level)

    json_payload["_meta"] = dict(json_payload.setdefault('_meta', dict()),
                                 **{"tumor_type_sort_order": order})
    r = requests.put(ES_URI + "/_mapping/trial",
                     json=json_payload,
                     auth=HTTPBasicAuth(ES_USER, ES_PASSWORD))
    msg = {"status_code": r.status_code, "text": r.text}
    logging.info(msg)
    return msg
Ejemplo n.º 4
0
    def test_build_oncotree(self):

        # build the tree.
        g = build_oncotree(file_path='data/tumor_types.txt')

        # assert we have some nodes.
        assert len(g.nodes()) > 10
Ejemplo n.º 5
0
    def __init__(self, args):

        self.args = args

        self.mda_df = None
        self.vicc_df = None
        self.uhn_df = None
        self.grcc_df = None

        self.oncotree = oncotreenx.build_oncotree()
Ejemplo n.º 6
0
    def test_ncit(self):

        # loop over tree type.
        for x in ['data/tumor_types.old.txt', 'data/tumor_types.txt']:

            # build the tree.
            g = build_oncotree(file_path=x)

            # spot check several values.
            g.node['BLOOD']['metanci'] == 'C12434'
            g.node['PT']['metanci'] == 'C7575'
Ejemplo n.º 7
0
def unique_query():

    # parse parameters
    status, val = parse_resource_field()

    # bad args.
    if status == 1:
        return val

    # good args.
    resource, field = val

    # special case for oncotree.
    if resource == 'clinical' and field == 'ONCOTREE_PRIMARY_DIAGNOSIS_NAME':

        # make oncotree.
        onco_tree = oncotreenx.build_oncotree(settings.DATA_ONCOTREE_FILE)

        # loop over every-node and do text match.
        #results = set([onco_tree.node[n]['text'] for n in onco_tree.nodes()])
        #results.remove("root")
        #results = list(results)

        # turn into
        results = list()
        for n in onco_tree.nodes():
            tmp = {
                'text': onco_tree.node[n]['text'],
                'code': n
            }
            results.append(tmp)

    else:

        # search for this field.
        db = app.data.driver.db
        results = db[resource].distinct(field)

        # remove non.
        tmp = set(results)
        if None in tmp:
            tmp.remove(None)
            results = list(tmp)

    # encode response.
    data = json.dumps({'resource': resource, 'field': field, 'values': results})
    resp = Response(response=data,
        status=200,
        mimetype="application/json")

    return resp
Ejemplo n.º 8
0
    def __init__(self, args):

        self.args = args
        self.sample_df = None
        self.patient_df = None
        self.impact341_df = pd.DataFrame(columns=OUT_COLS)
        self.impact410_df = pd.DataFrame(columns=OUT_COLS)

        self.panel_dict = {
            'MSK-IMPACT341': self.impact341_df,
            'MSK-IMPACT410': self.impact410_df
        }

        self.oncotree = oncotreenx.build_oncotree()
        self.i = 0
Ejemplo n.º 9
0
def build_oncotree():
    """Builds oncotree"""
    return oncotreenx.build_oncotree(file_path=TUMOR_TREE)
Ejemplo n.º 10
0
def prepare_criteria(item):

    onco_tree = oncotreenx.build_oncotree(settings.DATA_ONCOTREE_FILE)

    c = {}
    clin_txt_1 = ""
    clin_txt_2_gender = ""
    clin_txt_2_age = ""
    if 'clinical_filter' in item:

        clin_tmp = json.dumps(item['clinical_filter'])
        for key, val in REREPLACEMENTS.items():
            clin_tmp = clin_tmp.replace(key, val)

        c = json.loads(clin_tmp)

        if 'GENDER' in item['clinical_filter']:
            clin_txt_2_gender = item['clinical_filter']['GENDER']

        if 'BIRTH_DATE' in item['clinical_filter']:
            op = next(iter(item['clinical_filter']['BIRTH_DATE'].keys()))
            val = next(iter(item['clinical_filter']['BIRTH_DATE'].values()))

            try:
                val = datetime.datetime.strptime(val.replace(" GMT", ""),
                                                 '%a, %d %b %Y %H:%M:%S')
            except ValueError:
                val = dateutil.parser.parse(val)

            # compute the age.
            today = datetime.date.today()
            tmp = today.year - val.year - ((today.month, today.day) <
                                           (val.month, val.day - 1))
            val = tmp

            if op.count("gte") > 0:
                clin_txt_2_age = "< %s" % val
            else:
                clin_txt_2_age = "> %s" % val

        # parse date-times.
        for key in ['BIRTH_DATE', 'REPORT_DATE']:

            if key not in c:
                continue

            # extract the expression value.
            lkey, lval = next(iter(c[key].keys())), next(iter(c[key].values()))

            try:
                c[key][lkey] = datetime.datetime.strptime(
                    lval.replace(" GMT", ""), '%a, %d %b %Y %H:%M:%S')
            except ValueError:
                c[key][lkey] = dateutil.parser.parse(lval)

        # expand oncotree
        if 'ONCOTREE_PRIMARY_DIAGNOSIS_NAME' in item['clinical_filter']:

            txt = item['clinical_filter']['ONCOTREE_PRIMARY_DIAGNOSIS_NAME']

            if txt == "_LIQUID_" or txt == "_SOLID_":

                node1 = oncotreenx.lookup_text(onco_tree, "Lymph")
                node2 = oncotreenx.lookup_text(onco_tree, "Blood")

                nodes1 = list(nx.dfs_tree(onco_tree, node1))
                nodes2 = list(nx.dfs_tree(onco_tree, node2))
                nodes = list(set(nodes1).union(set(nodes2)))

                if txt == "_SOLID_":

                    all_nodes = set(list(onco_tree.nodes()))
                    tmp_nodes = all_nodes - set(nodes)
                    nodes = list(tmp_nodes)

                clin_txt_1 = "%s cancers" % txt.replace("_", "").title()

            else:

                clin_txt_1 = "%s" % txt
                node = oncotreenx.lookup_text(onco_tree, txt)
                if onco_tree.has_node(node):
                    nodes = list(nx.dfs_tree(onco_tree, node))

            nodes_txt = [onco_tree.node[n]['text'] for n in nodes]
            c['ONCOTREE_PRIMARY_DIAGNOSIS_NAME'] = {'$in': nodes_txt}

    g = {}
    gen_txt = []
    if 'genomic_filter' in item:

        gen_tmp = json.dumps(item['genomic_filter'])
        for key, val in REREPLACEMENTS.items():
            gen_tmp = gen_tmp.replace(key, val)

        g = json.loads(gen_tmp)

        # add TRUE_HUGO_SYMBOL value mutational signature filter queries
        if 'TRUE_HUGO_SYMBOL' in g and g['TRUE_HUGO_SYMBOL'] == {'$in': ['']}:
            g['TRUE_HUGO_SYMBOL'] = None

        sv_test = False
        mut_test = False
        cnv_test = False
        if 'VARIANT_CATEGORY' in item['genomic_filter']:
            variant_category = item['genomic_filter']['VARIANT_CATEGORY']
            if isinstance(variant_category, dict):
                for x in variant_category.values():
                    if "SV" in set(x):
                        sv_test = True
                    if "CNV" in set(x):
                        cnv_test = True
                    if "MUTATION" in set(x):
                        mut_test = True

            elif item['genomic_filter']['VARIANT_CATEGORY'] == 'SV':
                sv_test = True

            elif item['genomic_filter']['VARIANT_CATEGORY'] == 'CNV':
                cnv_test = True

            elif item['genomic_filter']['VARIANT_CATEGORY'] == 'MUTATION':
                mut_test = True

        # build text.
        exon_txt = ""
        protein_txt = ""
        if mut_test:
            gen_txt.append("Mutation")

            if 'TRUE_EXON_CHANGE' in item['genomic_filter']:
                exon_txt = item['genomic_filter']['TRUE_EXON_CHANGE']

            if 'TRUE_PROTEIN_CHANGE' in item['genomic_filter']:
                protein_txt = item['genomic_filter']['TRUE_PROTEIN_CHANGE']

        if cnv_test:
            if 'CNV_CALL' in g:
                if isinstance(g['CNV_CALL'], dict):
                    gen_txt += next(iter(g['CNV_CALL'].values()))
                else:
                    gen_txt.append(g['CNV_CALL'])
        if sv_test:
            gen_txt.append("Structural rearrangement")

        if 'MMR_STATUS' in item['genomic_filter']:
            gen_txt.append(item['genomic_filter']['MMR_STATUS'])

        if 'TABACCO_STATUS' in item['genomic_filter']:
            gen_txt.append('Tobacco Mutational Signature')

        if 'TEMOZOLOMIDE_STATUS' in item['genomic_filter']:
            gen_txt.append('Temozolomide Mutational Signature')

        if 'POLE_STATUS' in item['genomic_filter']:
            gen_txt.append('PolE Mutational Signature')

        if 'APOBEC_STATUS' in item['genomic_filter']:
            gen_txt.append('APOBEC Mutational Signature')

        if 'UVA_STATUS' in item['genomic_filter']:
            gen_txt.append('UVA Mutational Signature')

        clauses = []
        if mut_test:

            clause = {
                'VARIANT_CATEGORY': 'MUTATION',
                'TRUE_HUGO_SYMBOL': g['TRUE_HUGO_SYMBOL']
            }

            if 'WILDTYPE' in g:
                clause['WILDTYPE'] = g['WILDTYPE']

            if 'TRUE_PROTEIN_CHANGE' in g:
                clause['TRUE_PROTEIN_CHANGE'] = g['TRUE_PROTEIN_CHANGE']

            clauses.append(clause)

        if cnv_test:

            clause = {
                'VARIANT_CATEGORY': 'CNV',
                'TRUE_HUGO_SYMBOL': g['TRUE_HUGO_SYMBOL'],
            }

            if 'CNV_CALL' in g:
                clause['CNV_CALL'] = g['CNV_CALL']

            if 'WILDTYPE' in g:
                clause['WILDTYPE'] = g['WILDTYPE']

            clauses.append(clause)

        if sv_test:

            true_hugo = item['genomic_filter']['TRUE_HUGO_SYMBOL']

            if isinstance(true_hugo, dict):
                genes = next(iter(true_hugo.values()))
            else:
                genes = [true_hugo]

            to_add = list()
            for gene in genes:
                if gene in synonyms:
                    to_add += synonyms[gene]

            genes = genes + to_add

            abc = '|'.join([
                rf"(.*\W{gene}\W.*)|(^{gene}\W.*)|(.*\W{gene}$)"
                for gene in genes
            ])

            clauses.append({'STRUCTURAL_VARIANT_COMMENT': {"$regex": abc}})
            clauses.append({'LEFT_PARTNER_GENE': {'$in': genes}})
            clauses.append({'RIGHT_PARTNER_GENE': {'$in': genes}})

        if len(clauses) > 0:
            g = {"$or": clauses}

        for key in item['genomic_filter']:

            special_clauses = {
                'STRUCTURAL_VARIANT_COMMENT', 'VARIANT_CATEGORY',
                'TRUE_HUGO_SYMBOL', 'CNV_CALL', 'WILDTYPE',
                'TRUE_PROTEIN_CHANGE'
            }
            if key in special_clauses:
                continue

            g[key] = item['genomic_filter'][key]

        get_recursively(g, "GMT")
        if 'TRUE_HUGO_SYMBOL' in item['genomic_filter']:
            if isinstance(item['genomic_filter']['TRUE_HUGO_SYMBOL'], dict):
                genes = next(
                    iter(item['genomic_filter']['TRUE_HUGO_SYMBOL'].values()))
            else:
                genes = [item['genomic_filter']['TRUE_HUGO_SYMBOL']]

            genes = [str(i) for i in genes]
            genes = ', '.join(genes)

            if len(gen_txt) > 1:
                gen_txt = "%s: %s" % (genes, ', '.join(gen_txt))
            else:

                if exon_txt == "" and protein_txt == "":
                    gen_txt = "%s %s" % (genes, ', '.join(gen_txt))
                elif exon_txt != "":
                    gen_txt = "%s exon %s" % (genes, exon_txt)
                else:
                    gen_txt = "%s %s" % (genes, protein_txt)

    return c, g, (gen_txt, [clin_txt_1, clin_txt_2_age, clin_txt_2_gender])
Ejemplo n.º 11
0
def prepare_criteria(item):

    onco_tree = oncotreenx.build_oncotree(settings.DATA_ONCOTREE_FILE)

    c = {}
    clin_txt_1 = ""
    clin_txt_2_gender = ""
    clin_txt_2_age = ""
    if 'clinical_filter' in item:

        clin_tmp = json.dumps(item['clinical_filter'])
        for key, val in REREPLACEMENTS.items():
            clin_tmp = clin_tmp.replace(key, val)

        c = json.loads(clin_tmp)

        if 'GENDER' in item['clinical_filter']:
            clin_txt_2_gender = item['clinical_filter']['GENDER']

        if 'BIRTH_DATE' in item['clinical_filter']:
            op = item['clinical_filter']['BIRTH_DATE'].keys()[0]
            val = item['clinical_filter']['BIRTH_DATE'].values()[0]

            try:
                val = datetime.datetime.strptime(val.replace(" GMT", ""), '%a, %d %b %Y %H:%M:%S')
            except ValueError:
                val = dateutil.parser.parse(val)

            # compute the age.
            today = datetime.date.today()
            tmp = today.year - val.year - ((today.month, today.day) < (val.month, val.day - 1))
            val = tmp

            if op.count("gte") > 0:
                clin_txt_2_age = "< %s" % val
            else:
                clin_txt_2_age = "> %s" % val

        # parse date-times.
        for key in ['BIRTH_DATE', 'REPORT_DATE']:

            if key not in c:
                continue

            # extract the expression value.
            lkey, lval = c[key].keys()[0], c[key].values()[0]

            try:
                c[key][lkey] = datetime.datetime.strptime(lval.replace(" GMT", ""), '%a, %d %b %Y %H:%M:%S')
            except ValueError:
                c[key][lkey] = dateutil.parser.parse(lval)

        # expand oncotree
        if 'ONCOTREE_PRIMARY_DIAGNOSIS_NAME' in item['clinical_filter']:

            txt = item['clinical_filter']['ONCOTREE_PRIMARY_DIAGNOSIS_NAME']

            if txt == "_LIQUID_" or txt == "_SOLID_":

                node1 = oncotreenx.lookup_text(onco_tree, "Lymph")
                node2 = oncotreenx.lookup_text(onco_tree, "Blood")

                nodes1 = list(nx.dfs_tree(onco_tree, node1))
                nodes2 = list(nx.dfs_tree(onco_tree, node2))
                nodes = list(set(nodes1).union(set(nodes2)))

                if txt == "_SOLID_":

                    all_nodes = set(list(onco_tree.nodes()))
                    tmp_nodes = all_nodes - set(nodes)
                    nodes = list(tmp_nodes)

                clin_txt_1 = "%s cancers" % txt.replace("_", "").title()

            else:

                clin_txt_1 = "%s" % txt
                node = oncotreenx.lookup_text(onco_tree, txt)
                if onco_tree.has_node(node):
                    nodes = list(nx.dfs_tree(onco_tree, node))

            nodes_txt = [onco_tree.node[n]['text'] for n in nodes]
            c['ONCOTREE_PRIMARY_DIAGNOSIS_NAME'] = {'$in': nodes_txt}

    g = {}
    gen_txt = []
    if 'genomic_filter' in item:

        gen_tmp = json.dumps(item['genomic_filter'])
        for key, val in REREPLACEMENTS.items():
            gen_tmp = gen_tmp.replace(key, val)

        g = json.loads(gen_tmp)

        # add TRUE_HUGO_SYMBOL value mutational signature filter queries
        if 'TRUE_HUGO_SYMBOL' in g and g['TRUE_HUGO_SYMBOL'] == {'$in': ['']}:
            g['TRUE_HUGO_SYMBOL'] = None

        sv_test = False
        mut_test = False
        cnv_test = False
        if 'VARIANT_CATEGORY' in item['genomic_filter']:
            variant_category = item['genomic_filter']['VARIANT_CATEGORY']
            if isinstance(variant_category, dict):
                for x in variant_category.values():
                    if "SV" in set(x):
                        sv_test = True
                    if "CNV" in set(x):
                        cnv_test = True
                    if "MUTATION" in set(x):
                        mut_test = True

            elif item['genomic_filter']['VARIANT_CATEGORY'] == 'SV':
                sv_test = True

            elif item['genomic_filter']['VARIANT_CATEGORY'] == 'CNV':
                cnv_test = True

            elif item['genomic_filter']['VARIANT_CATEGORY'] == 'MUTATION':
                mut_test = True

        # build text.
        exon_txt = ""
        protein_txt = ""
        if mut_test:
            gen_txt.append("Mutation")

            if 'TRUE_EXON_CHANGE' in item['genomic_filter']:
                exon_txt = item['genomic_filter']['TRUE_EXON_CHANGE']

            if 'TRUE_PROTEIN_CHANGE' in item['genomic_filter']:
                protein_txt = item['genomic_filter']['TRUE_PROTEIN_CHANGE']

        if cnv_test:
            if 'CNV_CALL' in g:
                if isinstance(g['CNV_CALL'], dict):
                    gen_txt += g['CNV_CALL'].values()[0]
                else:
                    gen_txt.append(g['CNV_CALL'])
        if sv_test:
            gen_txt.append("Structural rearrangement")

        if 'MMR_STATUS' in item['genomic_filter']:
            gen_txt.append(item['genomic_filter']['MMR_STATUS'])

        if 'TABACCO_STATUS' in item['genomic_filter']:
            gen_txt.append('Tobacco Mutational Signature')

        if 'TEMOZOLOMIDE_STATUS' in item['genomic_filter']:
            gen_txt.append('Temozolomide Mutational Signature')

        if 'POLE_STATUS' in item['genomic_filter']:
            gen_txt.append('PolE Mutational Signature')

        if 'APOBEC_STATUS' in item['genomic_filter']:
            gen_txt.append('APOBEC Mutational Signature')

        if 'UVA_STATUS' in item['genomic_filter']:
            gen_txt.append('UVA Mutational Signature')

        clauses = []
        if mut_test:

            clause = {
                'VARIANT_CATEGORY': 'MUTATION',
                'TRUE_HUGO_SYMBOL': g['TRUE_HUGO_SYMBOL']
            }

            if 'WILDTYPE' in g:
                clause['WILDTYPE'] = g['WILDTYPE']

            if 'TRUE_PROTEIN_CHANGE' in g:
                clause['TRUE_PROTEIN_CHANGE'] = g['TRUE_PROTEIN_CHANGE']

            clauses.append(clause)

        if cnv_test:

            clause = {
                'VARIANT_CATEGORY': 'CNV',
                'TRUE_HUGO_SYMBOL': g['TRUE_HUGO_SYMBOL'],
            }

            if 'CNV_CALL' in g:
                clause['CNV_CALL'] = g['CNV_CALL']

            if 'WILDTYPE' in g:
                clause['WILDTYPE'] = g['WILDTYPE']

            clauses.append(clause)

        if sv_test:

            true_hugo = item['genomic_filter']['TRUE_HUGO_SYMBOL']

            if isinstance(true_hugo, dict):
                genes = true_hugo.values()[0]
            else:
                genes = [true_hugo]

            to_add = list()
            for gene in genes:
                if gene in synonyms:
                    to_add += synonyms[gene]

            genes = genes + to_add

            sv_clauses = []
            for gene in genes:
                abc = "(.*\W%s\W.*)|(^%s\W.*)|(.*\W%s$)" % (gene, gene, gene)
                sv_clauses.append(re.compile(abc, re.IGNORECASE))

            clause = {
                'STRUCTURAL_VARIANT_COMMENT': {"$in": sv_clauses}
            }
            clauses.append(clause)

        if len(clauses) > 0:
            g = {
                "$or": clauses
            }

        for key in item['genomic_filter']:

            special_clauses = {
                'STRUCTURAL_VARIANT_COMMENT',
                'VARIANT_CATEGORY',
                'TRUE_HUGO_SYMBOL',
                'CNV_CALL',
                'WILDTYPE',
                'TRUE_PROTEIN_CHANGE'
            }
            if key in special_clauses:
                continue

            g[key] = item['genomic_filter'][key]

        get_recursively(g, "GMT")
        if 'TRUE_HUGO_SYMBOL' in item['genomic_filter']:
            if isinstance(item['genomic_filter']['TRUE_HUGO_SYMBOL'], dict):
                genes = item['genomic_filter']['TRUE_HUGO_SYMBOL'].values()[0]
            else:
                genes = [item['genomic_filter']['TRUE_HUGO_SYMBOL']]

            genes = [str(i) for i in genes]
            genes = ', '.join(genes)

            if len(gen_txt) > 1:
                gen_txt = "%s: %s" % (genes, ', '.join(gen_txt))
            else:

                if exon_txt == "" and protein_txt == "":
                    gen_txt = "%s %s" % (genes, ', '.join(gen_txt))
                elif exon_txt != "":
                    gen_txt = "%s exon %s" % (genes, exon_txt)
                else:
                    gen_txt = "%s %s" % (genes, protein_txt)

    return c, g, (gen_txt, [clin_txt_1, clin_txt_2_age, clin_txt_2_gender])
Ejemplo n.º 12
0
def autocomplete_query():

    # parse parameters
    status, val = parse_resource_field()

    # parse the value.
    value = request.args.get("value")
    gene = request.args.get("gene")

    # bad args.
    if status == 1:
        return val

    # good args.
    resource, field = val

    # get the type.
    if resource == "genomic":
        schema = data_model.genomic_schema[field]['type']

    else:
        schema = data_model.clinical_schema[field]['type']

    # special cases.
    if resource == 'clinical' and field == 'ONCOTREE_PRIMARY_DIAGNOSIS_NAME':

        # make oncotree.
        onco_tree = oncotreenx.build_oncotree(settings.DATA_ONCOTREE_FILE)

        # loop over every-node and do text match.
        hit_set = set()
        for n in onco_tree.nodes():

            #TODO: Verify this doesn't need a decode
            a = onco_tree.node[n]['text'].lower()
            b = value.lower()
            if a.count(b) > 0:

                # get predecessors and ancestors
                hit_set.add(n)
                hit_set = hit_set.union(set(onco_tree.predecessors(n)))
                hit_set = hit_set.union(set(onco_tree.successors(n)))

        # remove root.
        if 'root' in hit_set:
            hit_set.remove('root')

        # convert to full text.
        results = [onco_tree.node[n]['text'] for n in hit_set]

    else:

        # only support string and integer.
        if schema not in set(['string', 'integer']):
            data = json.dumps({'error': 'unsupported field type: %s' % schema})
            resp = Response(response=data,
                status=400,
                mimetype="application/json")
            return resp

        # handle string.
        db = app.data.driver.db
        if schema == "string":

            # finalize search term.
            term = '.*%s.*' % value

            # first make query.
            if gene is None:
                query = db[resource].find({
                    field: {'$regex': term, '$options': '-i'}
                })
            else:
                query = db[resource].find({"$and": [
                    {field: {'$regex': term, '$options': '-i'}},
                    {"$or": [
                        {'TRUE_HUGO_SYMBOL': gene},
                        {'CNV_HUGO_SYMBOL': gene},
                    ]}
                ]})


        else:

            # finalize the search term.
            term = "/^%s.*/.test(this.%s)" % (value, field)

            # first make the query
            if gene is None:
                query = db[resource].find({"$and": [
                    {'$where': term},
                ]})

            else:
                query = db[resource].find({"$and": [
                    {'$where': term},
                    {"$or": [
                        {'TRUE_HUGO_SYMBOL': gene},
                        {'CNV_HUGO_SYMBOL': gene},
                    ]}
                ]})

        # extract distinct from query
        results = query.distinct(field)

    # remove non.
    tmp = set(results)
    if None in tmp:
        tmp.remove(None)
        results = list(tmp)

    # encode response.
    data = json.dumps({'resource': resource, 'field': field, 'values': results})
    resp = Response(response=data,
                    status=200,
                    mimetype="application/json")

    return resp
Ejemplo n.º 13
0
    def test_expand_liquid_oncotree(self):

        onco_tree = oncotreenx.build_oncotree(file_path=TUMOR_TREE)
        l, s, = expand_liquid_oncotree(onco_tree)
        assert 'Leukemia' in l
        assert 'Leukemia' not in s
Ejemplo n.º 14
0
    def test_expand_liquid_oncotree(self):

        onco_tree = oncotreenx.build_oncotree(file_path=TUMOR_TREE)
        l, s, = expand_liquid_oncotree(onco_tree)
        assert 'Leukemia' in l
        assert 'Leukemia' not in s
Ejemplo n.º 15
0
 def setup(self):
     self.g = build_oncotree(file_path="data/tumor_types.txt")