Exemple #1
0
def update_table(facet_dropdown, facet_range, facet_min, facet_max, dropdown_id, min_id, facets, graph, label):
    print("Facet inputs", facet_dropdown, facet_range, facet_min, facet_max)
    facet_inputs = {}
    for f, v in zip(dropdown_id, facet_dropdown):
        facet_inputs[f['index']] = v
    for f, s, n, m in zip(min_id, facet_range, facet_min, facet_max):
        facet_inputs[f['index']] = [s, n, m]
    if graph is not None and label is not None:
        conn = connect()
        G = conn.graph(graph)
        q = G.query().V().hasLabel(label)
        if len(facet_dropdown):
            for k, f in facets.items():
                if f['type'] in ["STRING", "BOOL"]:
                    fi = facet_inputs[f['index']]
                    if len(fi):
                        fset = []
                        # turn the dropdown selection numbers back into the original values
                        for j in fi:
                            fset.append(f['values'][j])
                        q = q.has(gripql.within(k, fset))
                elif f["type"] == "NUMERIC":
                    if len(facet_inputs[f['index']][0]) > 0:
                        q = q.has(gripql.inside(k, facet_inputs[f['index']][1], facet_inputs[f['index']][2]))
        data = results_data(q)
        return data, query_string(q)
    return [], "V()"
Exemple #2
0
def test_has_within(O):
    errors = []
    setupGraph(O)

    count = 0
    for i in O.query().V().has(gripql.within("occupation", ["jedi", "sith"])):
        count += 1
        if i['gid'] not in ["vertex2", "vertex5", "vertex6"]:
            errors.append("Wrong vertex returned %s" % (i))
    if count != 3:
        errors.append(
            "Fail: O.query().V().has(gripql.within(\"occupation\", [\"jedi\", \"sith\"])) %s != %s" %
            (count, 3))

    count = 0
    for i in O.query().V().has(gripql.within("occupation", 0)):
        count += 1
    if count != 0:
        errors.append(
            "Fail: O.query().V().has(gripql.within(\"occupation\", 0)) %s != %s" %
            (count, 0))

    return errors
Exemple #3
0
    def find_drugs_for_mutation_dataset(self, genes, dataset):

        program = "Program:" + dataset.upper()

        q = self.O.query().V(program).in_("InProgram").in_("InProject").in_(
            "SampleFor").in_("AliquotFor").distinct("_gid")
        all_aliquots = []
        for row in q:
            all_aliquots.append(row.gid)

        # GENES = ["CDKN2A", "PTEN", "TP53", "SMAD4"]
        gene_ids = {}
        for g in genes:
            for i in self.O.query().V().hasLabel("Gene").has(
                    gripql.eq("symbol", g)):
                gene_ids[g] = i.gid

        #Scan <dataset> cell lines based on mutation status
        mut_samples = {}
        norm_samples = {}

        q = self.O.query().V(all_aliquots).as_("sample").in_(
            "CallsetFor").outE("AlleleCall")
        q = q.has(gripql.within("ensembl_gene",
                                list(gene_ids.values()))).as_("variant")
        q = q.render({
            "sample": "$sample._gid",
            "gene": "$variant._data.ensembl_gene"
        })

        for res in q:
            mut_samples[res.gene] = mut_samples.get(res.gene, set()) | set(
                [res.sample])

        # get dataset samples without mutation
        for i in gene_ids.values():
            norm_samples[i] = list(
                set(all_aliquots).difference(mut_samples[i]))

            print("%s Positive Set: %d" % (i, len(mut_samples[i])))
            print("%s Negative Set: %d" % (i, len(norm_samples[i])))

        # Get response values for the positive set (samples with mutation) and collect AUC value by drug
        pos_response = {}
        compound = {}
        for g in gene_ids.values():
            pos_response[g] = {}
            q = self.O.query().V(list(mut_samples[g])).in_("ResponseIn").has(
                gripql.eq("source",
                          dataset)).as_("a").out("ResponseTo").as_("b").select(
                              ["a", "b"])
            for row in q:
                if hasattr(row['a']['data'],
                           'act_area'):  # not all rows have 'amax' attribute
                    v = row['a']['data']['act_area']
                else:
                    v = 0

                id = row['b']['gid']
                compound[id] = row['b']['data']['name']

                if id not in pos_response[g]:
                    pos_response[g][id] = [v]
                else:
                    pos_response[g][id].append(v)

        #Get response values for the negative set (samples without mutation) and collect AUC value by drug
        neg_response = {}
        for g in gene_ids.values():
            neg_response[g] = {}
            q = self.O.query().V(list(norm_samples[g])).in_("ResponseIn").has(
                gripql.eq("source",
                          dataset)).as_("a").out("ResponseTo").as_("b").select(
                              ["a", "b"])
            for row in q:
                if hasattr(row['a']['data'],
                           'act_area'):  # not all rows have 'amax' attribute
                    v = row['a']['data']['act_area']
                else:
                    v = 0
                id = row['b']['gid']
                compound[id] = row['b']['data']['name']

                if id not in neg_response[g]:
                    neg_response[g][id] = [v]
                else:
                    neg_response[g][id].append(v)

        #Collect t-test statistics
        drugs = set(
            itertools.chain.from_iterable(i.keys()
                                          for i in pos_response.values()))
        out = []
        for drug in drugs:
            for g in gene_ids.values():
                if drug in pos_response[g] and drug in neg_response[g]:
                    row = {"drug": drug, "mutation": g}

                    mut_values = pos_response[g][drug]
                    norm_values = neg_response[g][drug]
                    if len(mut_values) > 5 and len(norm_values) > 5:
                        s = stats.ttest_ind(mut_values,
                                            norm_values,
                                            equal_var=False)
                        if s.pvalue <= 0.05 and s.statistic > 0:  # means drug is significantly effective
                            out.append(compound[drug])

        # print(out)

        # get names of compounds
        return out
    def find_drugs_for_mutation_dataset(self, genes, dataset):
        dataset = dataset.upper()
        program = "Program:" + dataset
        q = self.O.query().V(program).out("projects").out("cases").distinct(
            "_gid")

        all_cases = []
        for row in q:
            all_cases.append(row.gid)

        gene_ids = {}
        for i in self.O.query().V().hasLabel("Gene").has(
                gripql.within("symbol", genes)):
            gene_ids[i.data.symbol] = i.gid

        mut_cases = {}
        norm_cases = {}

        q = self.O.query().V(all_cases).as_("ds")

        if dataset != "CCLE":
            q = q.out("same_as").has(gripql.eq("project_id", "Project:CCLE"))

        q = q.out("samples").out("aliquots").out("somatic_callsets")
        q = q.outE("alleles").has(
            gripql.within("ensembl_gene", list(gene_ids.values())))
        q = q.render({"case": "$ds._gid", "gene": "$._data.ensembl_gene"})

        for res in q:
            mut_cases[res.gene] = mut_cases.get(res.gene, set()) | set(
                [res.case])

        #get CCLE samples without mutation
        for i in gene_ids.values():
            norm_cases[i] = list(set(all_cases).difference(mut_cases[i]))

            print("%s Positive Set: %d" % (i, len(mut_cases[i])))
            print("%s Negative Set: %d" % (i, len(norm_cases[i])))

        names = {}

        area_metric = "act_area" if dataset == "CCLE" else "auc"
        area_metric = "aac"

        pos_response = {}
        for g in gene_ids.values():
            pos_response[g] = {}
            q = self.O.query().V(list(
                mut_cases[g])).as_("a").out("samples").out("aliquots")
            q = q.out("drug_response").as_("a").out("compounds").as_("b")
            q = q.select(["a", "b"])
            for row in q:
                if hasattr(row["a"]["data"], area_metric):
                    v = row["a"]["data"][area_metric]
                else:
                    v = 0

                id = row["b"]["gid"]
                names[id] = get_drug_name_from_compound(row["b"]["data"])

                if id not in pos_response[g]:
                    pos_response[g][id] = [v]
                else:
                    pos_response[g][id].append(v)

        neg_response = {}
        for g in gene_ids.values():
            neg_response[g] = {}
            q = self.O.query().V(list(
                norm_cases[g])).as_("a").out("samples").out("aliquots")
            q = q.out("drug_response").as_("a").out("compounds").as_("b")
            q = q.select(["a", "b"])
            for row in q:
                if hasattr(row["a"]["data"], area_metric):
                    v = row["a"]["data"][area_metric]
                else:
                    v = 0

                id = row["b"]["gid"]
                names[id] = get_drug_name_from_compound(row["b"]["data"])
                if id not in neg_response[g]:
                    neg_response[g][id] = [v]
                else:
                    neg_response[g][id].append(v)

        drugs = set(
            itertools.chain.from_iterable(i.keys()
                                          for i in pos_response.values()))
        out = []
        for drug in drugs:
            for g in gene_ids.values():
                if drug in pos_response[g] and drug in neg_response[g]:
                    row = {"drug": drug, "mutation": g}
                    mut_values = pos_response[g][drug]
                    norm_values = neg_response[g][drug]
                    if len(mut_values) > 5 and len(norm_values) > 5:
                        s = stats.ttest_ind(mut_values,
                                            norm_values,
                                            equal_var=False)
                        if s.pvalue <= 0.05:  # means drug is significantly effective
                            n = names[drug]
                            out.append(n)

        return out
Exemple #5
0
 def within(self, *args, **kwargs):
     return gripql.within(*args, **kwargs)