Beispiel #1
0
def file_state(file):
    return sm.DoAll([
        sm.Assign("loaded_file", sm.ValueState(file)),
        CheckHeader(),
        CheckTypes(),
        sm.Assign("dataframe", Done()),
        sm.Assign("env_name", t.String("Where would you like to save the dataframe?")),
        sm.AddToIrisEnv("env_name", "dataframe"),
    ])
class QueryDiseaseTreatments(IrisCommand):
    # what iris will call the command + how it will appear in a hint
    # title = "how does {condition} protects against {condition}?"
    title = "What drugs treat {disease}?"
    # give an example for iris to recognize the command

    examples = [
        "What are the treatments for {disease}?", "How is {disease} treated?"
    ]

    # type annotations for each command argument, to help Iris collect missing values from a user

    argument_types = {
        "disease": t.String("What disease do you want to get treatments for?")
    }

    # core logic of the command
    def command(self, disease):
        # Run the query
        #
        results = GNBR_api.query_treatments_for_disease(
            disease)  # list of lists

        return [results, disease]

    # wrap the output of a command to display to user
    # by default this will be an identity function
    # each element of the list defines a separate chat bubble
    def explanation(self, result):
        """"# results is an object with       # Pandas data frame with top 10 resutls
        self.similarities = None
        # List of paths to word clouds
        self.commonality_clouds = []
        """
        [results, disease] = result
        # make the df_name:
        df_name = 'treatments' + disease[:min(len(disease), 5)]
        # remove spaces and make lowercase
        df_name = df_name.replace(" ", "")
        df_name = df_name.lower()

        result_array = []
        if len(results) == 0:
            result_array.append('No treatments identified in GNBR')
            return result_array

        treatment_data = iris_objects.IrisDataframe(
            column_names=["Phenotype", "Mesh ID", "Frequency of Annotation"],
            column_types=["Text", "Text", "Text"],
            data=results)

        result_array.append(
            'The following treatments were identified in GNBR for %s' %
            disease)
        result_array.append(treatment_data)
        return result_array
Beispiel #3
0
class PathwayToPhenotype(IrisCommand):
    title = "How do defects in pathway affect phenotype?"
    examples = [
        "How do defects in {pathway} affect {phenotype}?",
        "Why are defs in {pathway} deleterious?",
        "How is {pathway} involved in the pathogenesis of {phenotype}"
    ]
    argument_types = {
        "pathway": t.String("What pathway are you interested in?"),
        "phenotype": t.String("What diseases do you want to analyze?")
    }

    def command(self, pathway, phenotype):
        string_answer = "Currently not implemented, but I also would like to know how defects in %s affect %s" % (
            pathway, phenotype)

        return string_answer

    def explanation(self, result):
        return result
Beispiel #4
0
class ToNumber(IrisCommand):
    title = "{x} to number"
    examples = []
    argument_types = {"x":t.String("What to convert to a number?")}
    def command(self, x):
        try:
            out =  float(x)
        except:
            out = None
        return out
    def explanation(self, result):
        return result
Beispiel #5
0
class SplitStringCommas(IrisCommand):
    title = "split string {str} on commas"
    examples = ["splitting string"]
    argument_types = {
        "str": t.String("What string do you want to split on commas?")
    }

    def command(self, str):
        return str.split(",")

    def explanation(self, result):
        return [result]
class CountCharacters(IrisCommand):
    title = "count characters in {string}"
    examples = ["{string} length", "char in {string}"]
    argument_types = {
        "string": t.String("What string to count characters for?")
    }

    def command(self, string):
        return len(string)

    def explanation(self, result):
        return result
Beispiel #7
0
class CountWords(IrisCommand):
    title = "count words in {doc}"
    examples = ["count words {doc}", "word count {doc}"]
    argument_types = {
        "doc": t.String("What is the string you want to anaylze?")
    }

    def command(self, doc):
        return len(doc.split())

    def explanation(self, result):
        return "{} words in doc".format(result)
Beispiel #8
0
class SearchKnowledgeSourceTitles(IrisCommand):
    title = "What knowledge source titles include {query}?"
    examples = ["What knowledge sources titles contain {query}?"]

    argument_types = {"query": t.String("What is the search term?")}

    def command(self, query):
        s = SmartAPI.SmartAPI()
        result = s.search_titles(query)
        return result  # returns list

    def explanation(self, result):
        if len(result) > 0:
            return result
        else:
            return 'No source titles found'
Beispiel #9
0
class SearchKnowledgeSourceFull(IrisCommand):
    title = "What knowledge sources include information about {query}?"
    examples = [
        "What knowledge sources discuss {query}?",
        "What sources in SmartAPI talk about {query}"
    ]

    argument_types = {"query": t.String("What is the search term?")}

    def command(self, query):
        s = SmartAPI.SmartAPI()
        result = s.search_all(query)
        return result

    def explanation(self, result):
        if len(result) > 0:
            return result
        else:
            return 'No knowledge sources found'
Beispiel #10
0
class ResearchQuestion(IrisCommand):
    title = "How can I study {question}?"
    examples = [
        "What are the best techniques I should use to study {question}?",
        "Which database can I use to study {question}",
        "Which R package can I use to study {question}"
    ]
    argument_types = {
        "question":
        t.String(
            "What is your research inquiry (clinical, biological, or otherwise)?"
        )
    }

    def command(self, question):
        return "That's a good question! Unfortunately we aren't magical enough to conjure up the anwer. But we'll definitely get back to you on that."

    def explanation(self, result):
        return result
Beispiel #11
0
class ChanceDisease(IrisCommand):
    title = "What are the chances that a person with conditions gets disease?"

    examples = [
        "What is the epidemiological risk {conditions} for {disease} and what are their effects on prognosis?",
        "What is my likelihood of getting {disease}",
        "How does {conditions} change my chances of getting {disease}?"
    ]
    argument_types = {
        "conditions":
        t.List(
            "What conditions (i.e. age, gender, comorbidities, etc.) do you want to factor into this calculation? Please enter them in separated by commas."
        ),
        "disease":
        t.String("What disease are you analyzing?")
    }

    def command(self, conditions, disease):
        return "Currently not implemented:( But we'll get on it as soon as possible."

    def explanation(self, result):
        return result
Beispiel #12
0
class RiskFactorsToDisease(IrisCommand):
    title = "What are the risks (environmental, genetic, etc.) that can lead to disease and why?"
    examples = [
        "Why is this risk bad for {disease}",
        "Why are these risks bad for {disease}", "Why should I not do {risks}",
        "Why should I stopping doing {risks}",
        "What are the {risks} (environmental, genetic, etc.) that can lead to {disease} and why?"
    ]
    argument_types = {
        "risks":
        t.List(
            "What risks do you want to know more about? Enter them in separated by commas"
        ),
        "disease":
        t.String("What disease are you analyzing?")
    }

    def command(self, risks, disease):
        return "Currently not implemented, but that sure is an important question."

    def explanation(self, result):
        return result
Beispiel #13
0
class GeneticDiseaseProtection(IrisCommand):
    # what iris will call the command + how it will appear in a hint
    # title = "how does {condition} protects against {condition}?"
    title = "What genetic diseases might condition protect against?"
    # give an example for iris to recognize the command

    examples = ["What does {condition} protect against",
                "Protection against genetic diseases", 
                "What does {condition} compensate for",
                "Protection against genetic diesease",
                "condition protects",
                "why does condition protect",
                "How does {condition} buffer against genetic diseases harmful effects"]


    # examples = ["What does {condition} protect against"]
    # type annotations for each command argument, to help Iris collect missing values from a user

    argument_types = {"condition": t.String("What is the condition do you want to analyze?")}

    # ,"genetic_disease":t.String("What is the genetic disease do you think it might link to? If unknown, type none")}

    # core logic of the command
    def command(self, condition):
        # Run the query
        results = Q1_query(condition)

        return [results, condition]


    # wrap the output of a command to display to user
    # by default this will be an identity function
    # each element of the list defines a separate chat bubble
    def explanation(self, result):
        """"
        results is an object with a Pandas data frame 
        """  
        [results, condition] = result

        # make the df_name:
        df_name = 'similarities_' + condition[:min(len(condition), 5)]
        # remove spaces and make lowercase
        df_name = df_name.replace(" ", "")
        df_name = df_name.lower()


        result_array = []
        if results.error is not None:
            result_array.append('There was an error processing your request')
            return result_array


        similarities = results.top_similarities()

        if similarities is None:
            result_array.append('No similarities could be computed')
        else:
            result_array.append('The following genetic diseases are the most semantically similar to your query')
            # adds the table to results
            similarities_df = iris_objects.IrisDataframe(data=similarities)
            self.iris.add_to_env(df_name, similarities_df)
            result_array.append(similarities_df)

        # display image (first one)
        # if len(results.commonality_clouds > 0):
        #    os.system("open " + results.commonality_clouds[0])



        return result_array
Beispiel #14
0
class GeneticConditionDisease(IrisCommand):
    # what iris will call the command + how it will appear in a hint
    # title = "how does {condition} protects against {condition}?"
    title = "What genetic disease might protect against {condition}?"
    # give an example for iris to recognize the command
    examples = [
        "what protects against {condition}",
        "what genetic conditions might offer protection against {condition} and why",
        "protective mechanism of condition against disease",
        "protective condition", "protection against disease",
        "do any genetic diseases protect against {condition}"
    ]

    # type annotations for each command argument, to help Iris collect missing values from a user
    argument_types = {
        "condition": t.String("What is the condition do you want to analyze?")
    }

    # core logic of the command
    def command(self, condition):
        # Run the query
        results = Q1_query(condition)

        return [condition, results]

    # wrap the output of a command to display to user
    # by default this will be an identity function
    # each element of the list defines a separate chat bubble
    def explanation(self, result):
        """"
        results is an object with Pandas data frame with top 10 resutls
        self.similarities = None
        # List of paths to word clouds
        self.commonality_clouds = []
        """
        condition, results = result

        # make the df_name:
        df_name = 'similarities_' + condition[:min(len(condition), 5)]
        # remove spaces and make lowercase
        df_name = df_name.replace(" ", "")
        df_name = df_name.lower()

        result_array = []
        if results.error is not None:
            result_array.append(
                'There was an error processing your request for %s' %
                condition)
            return result_array

        # adds the table to results
        similarities = results.top_similarities()
        if similarities is None:
            result_array.append('No similarities could be computed')
        else:

            similarities_df = iris_objects.IrisDataframe(data=similarities)
            # adds the table to results
            self.iris.add_to_env(df_name, similarities_df)

            result_array.append(similarities_df)

        # display image (first one)
        #if len(results.commonality_clouds > 0):
        #    os.system("open " + results.commonality_clouds[0])

        return result_array
Beispiel #15
0
class QueryDrugTargets(IrisCommand):
    # what iris will call the command + how it will appear in a hint
    # title = "how does {condition} protects against {condition}?"
    title = "What genes does drug target?"
    # give an example for iris to recognize the command

    examples = [
        "What are the targets of {drug}",
        "What are the drug targets of {drug}",
        "Search ChEMBL for targets of {drug}"
    ]

    # type annotations for each command argument, to help Iris collect missing values from a user

    argument_types = {
        "drug": t.String("What drug do you want to get gene targets for?")
    }

    # core logic of the command
    def command(self, drug):
        # Run the query
        #
        results = self.fetch_gene_targets(drug)

        return [results, drug]

    # wrap the output of a command to display to user
    # by default this will be an identity function
    # each element of the list defines a separate chat bubble
    def explanation(self, result):
        """"# results is an object with       # Pandas data frame with top 10 resutls
        self.similarities = None
        # List of paths to word clouds
        self.commonality_clouds = []
        """
        [results, drug] = result
        # make the df_name:
        df_name = 'target_genes_' + drug[:min(len(drug), 5)]
        # remove spaces and make lowercase
        df_name = df_name.replace(" ", "")
        df_name = df_name.lower()

        result_array = []
        if len(results) == 0:
            result_array.append('No drug targets identified in ChEMBL')
            return result_array

        gene_results_str = ', '.join(results)

        result_array.append(
            'The following genes were identified as drug targets in ChEMBL for %s'
            % drug)
        result_array.append(gene_results_str)
        return result_array

    def fetch_gene_targets(self, drug):
        # Fetch the uniprot ids for the drug targets from Chembl
        try:
            print("Fetching uniprot ids from ChEMBL for drug targets")
            uniprot_ids = QueryChEMBL.QueryChEMBL.get_target_uniprot_ids_for_drug(
                drug)

            print("Fetching gene names for uniprot ids")
            gene_results = np.array([])
            count = 0
            for u in uniprot_ids:
                gene = QueryUniprot.QueryUniprot.uniprot_id_to_gene_name(u)
                gene_results = np.append(gene_results, np.array(list(gene)))
                if count >= 100:
                    remaining = len(uniprot_ids) - count
                    if remaining < 1:
                        break
                    gene_results = np.append(
                        gene_results, np.array(["and %s more" % remaining]))
                    break
                count += 1

            print(gene_results)
            return gene_results
        except Exception as e:
            print("Error fetching gene targets: %s" % e)
            return 'Error'
Beispiel #16
0
class DrugGenes(IrisCommand):
    title = "How does {drug} affect these list of {genes}? "
    examples = []
    argument_types = {
        "drug":
        t.String(
            "Okay, a couple more questions to set up this task. For confirmation: What is the drug you want to analyze?"
        ),
        "genes":
        t.List(
            "What genes do you want to analyze? (enter NCBI gene names separated by commas)"
        ),
        "bool_image":
        t.YesNo("Would you like to visualize the results as a diagram?",
                yes=True,
                no=False),
        "bool_pubmed":
        t.YesNo("Would you like to get the list of pubmed IDs for reference?",
                yes=True,
                no=False),
        "bool_other_disease":
        t.YesNo(
            "Would you like to know other diseases that can be affected by the given drug?",
            yes=True,
            no=False)
    }

    # core logic of the command
    def command(self, drug, genes, bool_image, bool_pubmed,
                bool_other_disease):
        print('BEFORE QUERY!!!')
        # create options structure

        options = Options(gen_image=bool_image,
                          gene_list=genes,
                          gen_pubmed=bool_pubmed)

        answer = Q2_query(drug, None, options)  # disease is none
        print('ran Q2 query!!!')

        # Error handling in disease or drug is not found
        if isinstance(answer, str):
            answer_str = answer
            answer = {}
            answer['error'] = answer_str

        # if want to find other indications
        if bool_other_disease:
            answer["other_disease"] = run_main.find_drug_indications(drug)

        answer['drug'] = drug.strip()

        return answer

    def explanation(self, result):
        # Components of result are in dictionary form

        if "error" in result:
            return result['error']

        query_name = result['drug'][:min(len(result['drug']), 3)] + "_GENE"
        query_name = '_'.join(query_name.split(' '))
        query_name = "_" + query_name.lower()
        print(query_name, "== query name")

        # Print out genes associated with drug
        query_statement = 'How does ' + result[
            'drug'] + ' affect: ' + ', '.join(result['disease_genes']) + '?'
        result_array = ['Here are your results for: %s' % query_statement]
        result_array.append(
            'Top genes found to be targetted by %s are below. Full dataset saved as drug_genes_{drug_disease}'
            % result['drug'])
        drug_gene_term_object = iris_objects.IrisDataframe(
            data=result['drug_genes'])
        self.iris.add_to_env('drug_genes' + query_name, drug_gene_term_object)
        drug_gene_term_object_short = iris_objects.IrisDataframe(
            data=result['drug_genes_short'])
        result_array.append(drug_gene_term_object_short)
        print('added genes')
        # result_array.append("Full dataset saved as drug_associated_genes")

        # # Print out genes associated with disease
        # disease_gene_term_object = iris_objects.IrisDataframe(data=result['disease_genes'])
        # self.iris.add_to_env('disease_genes' + query_name, disease_gene_term_object)
        # disease_gene_term_object_short = iris_objects.IrisDataframe(data=result['disease_genes_short'])
        # result_array.append(disease_gene_term_object_short)

        # Print out signficant go terms
        result_array.append(
            'Top significant GO terms associated with the drug-disease interaction are shown. Full dataset saved as go_terms_{drug_disease}'
        )
        go_term_object = iris_objects.IrisDataframe(data=result['GOENRICH'])
        self.iris.add_to_env('go_terms' + query_name, go_term_object)

        go_term_object_short = iris_objects.IrisDataframe(
            data=result['GOENRICH_short'])
        result_array.append(go_term_object_short)
        result_array.append('No significant GO terms found')

        # result_array.append("Full dataset saved as drug_disease_go_terms")
        print('added GO terms')

        # get tissue = disease
        if 'tissue_df_dis' in result:
            result_array.append(
                'The most relevant tissues, in which disease genes are differentially expressed, are shown. Full dataset saved as tissues_{drug_disease} '
            )
            tissue_object_dis = iris_objects.IrisDataframe(
                data=result['tissue_df_dis'])
            tissue_object_dis_short = iris_objects.IrisDataframe(
                data=result['tissue_df_dis_short'])
            self.iris.add_to_env('tissues_disease' + query_name,
                                 tissue_object_dis)
            result_array.append(tissue_object_dis_short)
        else:
            result_array.append(
                'No differential tissue expression in disease state detected.')

        if "pubmed" in result:
            if isinstance(result["pubmed"], str):
                result_array.append(result["pubmed"])
            else:
                result_array.append(
                    "Following are PMIDs that support the interaction: Full dataset saved as pmid_{drug_disease}."
                )
                pmid_df_short = iris_objects.IrisDataframe(
                    data=result["pubmed_short"])
                pmid_df = iris_objects.IrisDataframe(data=result["pubmed"])
                self.iris.add_to_env('pmid' + query_name, pmid_df)

                result_array.append(pmid_df_short)
                # result_array.append("Full dataset saved as pmid_ids")
        print('added Pubmed terms')

        # get other possible disease
        if "other_disease" in result:
            ph_genes_str, drug = result["other_disease"]
            ph_genes_arr = ph_genes_str.split('\t')  # prb, BH, ph, sig_genes
            ph_genes_array_all = [
                ph_genes_arr[x:x + 4] for x in range(0, len(ph_genes_arr), 4)
            ]
            if len(ph_genes_arr) >= 4:
                # add explanation
                multi_answer_line = [
                    'Top hits of diseases potentially impacted by %s. Full dataset saved as drug_indications_{drug_disease}.'
                    % result['drug'],
                    'We queried the gene neighborhood of drug targets and found the following phenotypes to be significant. Here we list significant phenotypes in order of probability. Column headings are phenotype, probability, significance level cutoff, and a list of genes that support the relationship'
                ]
                result_array = result_array + multi_answer_line

                ph_genes_array_all_iris = iris_objects.IrisDataframe(
                    column_names=[
                        "Phenotype", "probability",
                        "Benjamin Hochberg significance cutoff",
                        "list of genes"
                    ],
                    column_types=["Text", "Text", "Text", "Text"],
                    data=ph_genes_array_all)
                self.iris.add_to_env('drug_indications' + query_name,
                                     ph_genes_array_all_iris)
                ph_genes_array_short = [
                    ph_genes_arr[x:x + 4]
                    for x in range(0, min(5 * 4, len(ph_genes_arr)), 4)
                ]
                ph_genes_array_short_iris = iris_objects.IrisDataframe(
                    column_names=[
                        "Phenotype", "Probability",
                        "Benjamin Hochberg significance cutoff",
                        "list of genes"
                    ],
                    column_types=["Text", "Text", "Text", "Text"],
                    data=ph_genes_array_short)
                result_array.append(ph_genes_array_short_iris)
            else:
                result_array.append('No other drug indications found')
            # result_array.append("Full dataset saved as drug_indications")
        print('added other drug indications')

        # display image
        if "image_file" in result:
            result_array.append('Diagram stored in: %s' % result["image_file"])
            os.system("open " + result["image_file"])
        print('added image')

        result_array.append(
            "Full dataframes are available for viewing using the command: print {dataframe_name}. See right side panel for more information."
        )
        result_array.append(
            "The suffix for the drug-disease interaction pair is: %s" %
            query_name)
        result_array.append("Results are also stored in: %s" % results_dir)

        return result_array