Python IrisDataframe Examples, iris.iris_objects.IrisDataframe Python Examples

Example #1

0

Show file

File: QueryDiseaseTreatments.py Project: johndpope/iris-ncats

    def explanation(self, result):
        """"# results is an object with       # Pandas data frame with top 10 resutls
        self.similarities = None
        # List of paths to word clouds
        self.commonality_clouds = []
        """
        [results, disease] = result
        # make the df_name:
        df_name = 'treatments' + disease[:min(len(disease), 5)]
        # remove spaces and make lowercase
        df_name = df_name.replace(" ", "")
        df_name = df_name.lower()

        result_array = []
        if len(results) == 0:
            result_array.append('No treatments identified in GNBR')
            return result_array

        treatment_data = iris_objects.IrisDataframe(
            column_names=["Phenotype", "Mesh ID", "Frequency of Annotation"],
            column_types=["Text", "Text", "Text"],
            data=results)

        result_array.append(
            'The following treatments were identified in GNBR for %s' %
            disease)
        result_array.append(treatment_data)
        return result_array

Example #2

0

Show file

File: fileupload.py Project: tchen0123/iris-agent

 def next_state_base(self, next):
     filename = self.read_variable("loaded_file").name
     dataframe = iris_objects.IrisDataframe(filename,
                                            self.context["headers"],
                                            self.context["types"],
                                            self.context["data"])
     return sm.ValueState(dataframe).when_done(self.get_when_done_state())

Example #3

0

Show file

File: Median.py Project: tchen0123/iris-agent

 def command(self, array):
     import numpy as np
     #print(np.median(array.to_matrix(),axis=0))
     return iris_objects.IrisDataframe(
         column_names=array.column_names,
         column_types=array.column_types,
         data=[np.median(array.to_matrix(), axis=0)])

Example #4

0

Show file

 def get_output(self):
     start_from = 1 if self.read_variable("throw_away") else 0
     sample_data = split_line(self.read_variable("loaded_file").content.split("\n")[start_from])
     dummy_frame = iris_objects.IrisDataframe(column_names=["column {}".format(i) for i,_ in enumerate(sample_data)], column_types=["_" for x in sample_data], data=[sample_data])
     return [
         "What are the headers? Please enter a list of comma-separated values. I've provided a line of sample data below.",
         {"type":"collection", "value":dummy_frame.generate_spreadsheet_data()}
     ]

Example #5

0

Show file

 def next_state_base(self, text):
     file_str = self.context['data']
     types = rows_and_types(split_line(file_str[0]))
     if not self.force_check:
         self.context["types"] = types
     if self.force_check or util.verify_response(text):
         print(types)
         dummy_frame = iris_objects.IrisDataframe(column_names=self.context['headers'], column_types=["String" for _ in types], data=[types])
         print_types = sm.Print([{"type":"collection_select_one", "value":dummy_frame.generate_spreadsheet_data()}]) #util.prettify_data(type_obj)}])
         return sm.DoAll([print_types, ChangeIndex()]).when_done(self.get_when_done_state())
     return None #True, Done().when_done(self.get_when_done_state())

Example #6

0

Show file

 def get_output(self):
     file_str = self.read_variable("loaded_file").content
     start_read = 1 if self.read_variable("throw_away") else 0
     headers = [x.lower() for x in split_line(file_str.split("\n")[start_read])]
     data_sample = [[x for x in split_line(line)] for line in file_str.split("\n")[start_read+1:start_read+4]]
     format_header = util.prettify_data(headers)
     dummy_frame = iris_objects.IrisDataframe(column_names=headers, column_types=headers, data=data_sample)
     return [
         "Here are the headers I inferred from the first line. Do these look good?",
         {"type":"collection", "value":dummy_frame.generate_spreadsheet_data()}
     ]

Example #7

0

Show file

 def next_state_base(self, text):
     file_str = self.read_variable("loaded_file").content
     lines = file_str.split("\n")
     num_cols = len(split_line(lines[0]))
     headers = ["column{}".format(i) for i in range(0,num_cols)]
     self.context['headers'] = headers
     start_from = 1 if self.read_variable("throw_away") else 0
     self.context['data'] = file_str.split("\n")[start_from:]
     format_header = util.prettify_data(headers)
     data_sample = [[x for x in split_line(line)] for line in self.context['data'][start_from+1:start_from+4]]
     dummy_frame = iris_objects.IrisDataframe(column_names=headers, column_types=headers, data=data_sample)
     return sm.Print([{"type":"collection", "value":dummy_frame.generate_spreadsheet_data()}]).when_done(self.get_when_done_state())

Example #8

0

Show file

File: EmpathAnalysis.py Project: tchen0123/iris-agent

 def command(self, documents):
     documents = documents.to_matrix().flatten()
     import numpy as np
     from empath import Empath
     lexicon = Empath()
     to_df = []
     out_dict = lexicon.analyze(documents.tolist(), normalize=True)
     for k, v in sorted(out_dict.items(), key=lambda x: x[1], reverse=True):
         to_df.append([k, v])
     #to_df = np.array(to_df)
     return iris_objects.IrisDataframe(
         column_names=["category", "normalized_count"],
         column_types=["String", "Number"],
         data=to_df)

Example #9

0

Show file

 def test_dataframe(self):
     dataframe = iris_objects.IrisDataframe([{
         "firstname": "Ethan",
         "lastname": "Fast"
     }, {
         "firstname": "Binbin",
         "lastname": "Chen"
     }])
     dataframe2 = dataframe.add_column("age", [27, 27])
     dataframe3 = dataframe.add_columns(
         ["age", "occupation"],
         [[27, 27], ["CS PhD Student", "Bioinformatics PhD Student"]])
     data = dataframe3.generate_spreadsheet_data()
     dataframe.add_rows([""])
     print(dataframe)
     print(dataframe2.df["age"])
     print(dataframe3.df["occupation"])

Example #10

0

Show file

    def command(self, file, bool_image, bool_pubmed, bool_other_disease,
                bool_display):
        import pandas as pd
        panda_df = pd.read_csv(file.path, sep='\t')
        iris_df = iris_objects.IrisDataframe(data=panda_df)

        # print('read in file')
        self.iris.add_to_env('drug_disease_list', iris_df)

        # generate options object
        task_dir = os.path.join(
            results_dir,
            datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
        options = Options(gen_image=bool_image,
                          gen_pubmed=bool_pubmed,
                          outPath=task_dir)

        # get list of drugs and conditions
        drug_list = list(panda_df.ix[:, 0])
        disease_list = list(panda_df.ix[:, 1])

        # store within the directory as a list
        answer_arr = [task_dir]
        for drug, disease in zip(drug_list, disease_list):

            answer = Q2_query(drug, disease, options)
            if isinstance(answer, str):
                answer_str = answer
                answer = {}
                answer['error'] = answer_str

            if bool_other_disease:
                answer["other_disease"] = run_main.find_drug_indications(drug)

            answer['drug'] = drug.strip()
            answer['disease'] = disease.strip()
            answer_arr.append(answer)

        if bool_display:
            return answer_arr
        else:
            return ' '.join(("written results to:", task_dir))

Example #11

0

Show file

    def explanation(self, result):
        """"
        results is an object with Pandas data frame with top 10 resutls
        self.similarities = None
        # List of paths to word clouds
        self.commonality_clouds = []
        """
        condition, results = result

        # make the df_name:
        df_name = 'similarities_' + condition[:min(len(condition), 5)]
        # remove spaces and make lowercase
        df_name = df_name.replace(" ", "")
        df_name = df_name.lower()

        result_array = []
        if results.error is not None:
            result_array.append(
                'There was an error processing your request for %s' %
                condition)
            return result_array

        # adds the table to results
        similarities = results.top_similarities()
        if similarities is None:
            result_array.append('No similarities could be computed')
        else:

            similarities_df = iris_objects.IrisDataframe(data=similarities)
            # adds the table to results
            self.iris.add_to_env(df_name, similarities_df)

            result_array.append(similarities_df)

        # display image (first one)
        #if len(results.commonality_clouds > 0):
        #    os.system("open " + results.commonality_clouds[0])

        return result_array

Example #12

0

Show file

    def explanation(self, result):
        """"
        results is an object with a Pandas data frame 
        """  
        [results, condition] = result

        # make the df_name:
        df_name = 'similarities_' + condition[:min(len(condition), 5)]
        # remove spaces and make lowercase
        df_name = df_name.replace(" ", "")
        df_name = df_name.lower()


        result_array = []
        if results.error is not None:
            result_array.append('There was an error processing your request')
            return result_array


        similarities = results.top_similarities()

        if similarities is None:
            result_array.append('No similarities could be computed')
        else:
            result_array.append('The following genetic diseases are the most semantically similar to your query')
            # adds the table to results
            similarities_df = iris_objects.IrisDataframe(data=similarities)
            self.iris.add_to_env(df_name, similarities_df)
            result_array.append(similarities_df)

        # display image (first one)
        # if len(results.commonality_clouds > 0):
        #    os.system("open " + results.commonality_clouds[0])



        return result_array

Example #13

0

Show file

    def explanation(self, result):
        # Components of result are in dictionary form

        if "error" in result:
            return result['error']

        query_name = result['drug'][:min(len(result['drug']), 3
                                         )] + "_" + result['disease'][:min(
                                             len(result['disease']), 3)]
        query_name = '_'.join(query_name.split(' '))
        query_name = "_" + query_name.lower()
        print(query_name, "== query name")

        # Print out genes associated with drug
        query_statement = 'How does ' + result['drug'] + '(' + result[
            'drug_id'] + ') treat ' + result['disease'] + '(' + result[
                'disease_id'] + ').'
        result_array = ['Here are your results for: %s' % query_statement]
        result_array.append(
            'Top genes found to be targetted by %s are below. Full dataset saved as drug_genes_{drug_disease}'
            % result['drug'])
        drug_gene_term_object = iris_objects.IrisDataframe(
            data=result['drug_genes'])
        self.iris.add_to_env('drug_genes' + query_name, drug_gene_term_object)
        drug_gene_term_object_short = iris_objects.IrisDataframe(
            data=result['drug_genes_short'])
        result_array.append(drug_gene_term_object_short)
        # result_array.append("Full dataset saved as drug_associated_genes")

        # Print out genes associated with disease
        result_array.append(
            'Top genes found to be associated with %s are below. Full dataset saved as disease_genes_{drug_disease}'
            % result['disease'])
        disease_gene_term_object = iris_objects.IrisDataframe(
            data=result['disease_genes'])
        self.iris.add_to_env('disease_genes' + query_name,
                             disease_gene_term_object)
        disease_gene_term_object_short = iris_objects.IrisDataframe(
            data=result['disease_genes_short'])
        result_array.append(disease_gene_term_object_short)

        # Print out signficant go terms
        try:
            result_array.append(
                'Top significant GO terms associated with the drug-disease interaction are shown. Full dataset saved as go_terms_{drug_disease}'
            )
            go_term_object = iris_objects.IrisDataframe(
                data=result['GOENRICH'])
            self.iris.add_to_env('go_terms' + query_name, go_term_object)

            go_term_object_short = iris_objects.IrisDataframe(
                data=result['GOENRICH_short'])
            result_array.append(go_term_object_short)
        except:
            result_array.append('No significant GO terms found')
        # result_array.append("Full dataset saved as drug_disease_go_terms")

        # get tissue = disease
        if 'tissue_df_dis' in result:
            result_array.append(
                'The most relevant tissues, in which disease genes are differentially expressed, are shown. Full dataset saved as tissues_{drug_disease} '
            )
            tissue_object_dis = iris_objects.IrisDataframe(
                data=result['tissue_df_dis'])
            tissue_object_dis_short = iris_objects.IrisDataframe(
                data=result['tissue_df_dis_short'])
            self.iris.add_to_env('tissues_disease' + query_name,
                                 tissue_object_dis)
            result_array.append(tissue_object_dis_short)
        else:
            result_array.append(
                'No differential tissue expression in disease state detected.')

        if "pubmed" in result:
            if isinstance(result["pubmed"], str):
                result_array.append(result["pubmed"])
            else:
                result_array.append(
                    "Following are PMIDs that support the interaction: Full dataset saved as pmid_{drug_disease}."
                )
                pmid_df_short = iris_objects.IrisDataframe(
                    data=result["pubmed_short"])
                pmid_df = iris_objects.IrisDataframe(data=result["pubmed"])
                self.iris.add_to_env('pmid' + query_name, pmid_df)

                result_array.append(pmid_df_short)
                # result_array.append("Full dataset saved as pmid_ids")

        # get other possible disease
        if "other_disease" in result:
            ph_genes_str, drug = result["other_disease"]
            ph_genes_arr = ph_genes_str.split('\t')  # prb, BH, ph, sig_genes
            ph_genes_array_all = [
                ph_genes_arr[x:x + 4] for x in range(0, len(ph_genes_arr), 4)
            ]
            if len(ph_genes_arr) >= 4:
                # add explanation
                multi_answer_line = [
                    'Top hits of diseases potentially impacted by %s. Full dataset saved as drug_indications_{drug_disease}.'
                    % result['drug'],
                    'We queried the gene neighborhood of drug targets and found the following phenotypes to be significant. Here we list significant phenotypes in order of probability. Column headings are phenotype, probability, significance level cutoff, and a list of genes that support the relationship'
                ]
                result_array = result_array + multi_answer_line

                ph_genes_array_all_iris = iris_objects.IrisDataframe(
                    column_names=[
                        "Phenotype", "probability",
                        "Benjamin Hochberg significance cutoff",
                        "list of genes"
                    ],
                    column_types=["Text", "Text", "Text", "Text"],
                    data=ph_genes_array_all)
                self.iris.add_to_env('drug_indications' + query_name,
                                     ph_genes_array_all_iris)
                ph_genes_array_short = [
                    ph_genes_arr[x:x + 4]
                    for x in range(0, min(5 * 4, len(ph_genes_arr)), 4)
                ]
                ph_genes_array_short_iris = iris_objects.IrisDataframe(
                    column_names=[
                        "Phenotype", "Probability",
                        "Benjamin Hochberg significance cutoff",
                        "list of genes"
                    ],
                    column_types=["Text", "Text", "Text", "Text"],
                    data=ph_genes_array_short)
                result_array.append(ph_genes_array_short_iris)
            else:
                result_array.append('No other drug indications found')
            # result_array.append("Full dataset saved as drug_indications")

        # display image
        if "image_file" in result:
            result_array.append('Diagram stored in: %s' % result["image_file"])
            os.system("open " + result["image_file"])

        result_array.append(
            "Full dataframes are available for viewing using the command: print {dataframe_name}. See right side panel for more information."
        )
        result_array.append(
            "The suffix for the drug-disease interaction pair is: %s" %
            query_name)
        result_array.append("Results are also stored in: %s" % results_dir)

        return result_array

Example #14

0

Show file

    def explanation(self, results):
        if isinstance(results, str):
            return results

        else:
            task_dir = results.pop(0)

            explanation_array = [
                'Result tables for each query are stored in the right side bar as variables. You can view a table using the command: print {dataframe_name}_{suffix}.'
            ]
            explanation_array.append(
                'Diagrams (if requested) and other results can be found in the results directory: %s'
                % task_dir)
            explanation_array.append(
                'Suffix and variable information is displayed below')
            # iterate through every drug-disease pair
            drug_arr = []
            disease_arr = []
            worked_arr = []
            suffix_arr = []
            assoc_variables = []

            for result in results:
                print('results', result['drug'], result['disease'])
                drug_arr.append(result['drug'])
                disease_arr.append(result['disease'])

                if 'error' in result:
                    worked_arr.append(result['error'])
                    suffix_arr.append('')
                    assoc_variables.append('')
                else:
                    worked_arr.append('SUCCESS')

                    # get suffix information
                    query_name = result[
                        'drug'][:min(len(result['drug']), 3)] + "_" + result[
                            'disease'][:min(len(result['disease']), 3)]
                    query_name = ''.join(query_name.split(' '))
                    query_name = "_" + query_name.lower()
                    suffix_arr.append(query_name)

                    # get associated drug genes
                    drug_gene_term_object = iris_objects.IrisDataframe(
                        data=result['drug_genes'])
                    self.iris.add_to_env('drug_genes' + query_name,
                                         drug_gene_term_object)

                    # get genes associated with disease
                    disease_gene_term_object = iris_objects.IrisDataframe(
                        data=result['disease_genes'])
                    self.iris.add_to_env('disease_genes' + query_name,
                                         disease_gene_term_object)

                    # get out signficant go terms
                    go_term_object = iris_objects.IrisDataframe(
                        data=result['GOENRICH'])
                    self.iris.add_to_env('go_terms' + query_name,
                                         go_term_object)

                    variable_info = [
                        'drug_genes' + query_name,
                        'disease_genes' + query_name, 'go_terms' + query_name
                    ]

                    # get tissue = disease
                    if 'tissue_df_dis' in result:
                        variable_info.append('tissues_disease' + query_name)
                        tissue_object_dis = iris_objects.IrisDataframe(
                            data=result['tissue_df_dis'])
                        self.iris.add_to_env('tissues_disease' + query_name,
                                             tissue_object_dis)

                    if "pubmed" in result:
                        if not isinstance(result["pubmed"], str):
                            variable_info.append('pmid' + query_name)
                            pmid_df = iris_objects.IrisDataframe(
                                data=result["pubmed"])
                            self.iris.add_to_env('pmid' + query_name, pmid_df)

                    # get other possible disease
                    if "other_disease" in result:
                        ph_genes_str, drug = result["other_disease"]
                        ph_genes_arr = ph_genes_str.split(
                            '\t')  # prb, BH, ph, sig_genes
                        if len(ph_genes_arr) >= 4:
                            ph_genes_array_all = [
                                ph_genes_arr[x:x + 4]
                                for x in range(0, len(ph_genes_arr), 4)
                            ]
                            ph_genes_array_all_iris = iris_objects.IrisDataframe(
                                column_names=[
                                    "Phenotype", "probability",
                                    "Benjamin Hochberg significance cutoff",
                                    "list of genes"
                                ],
                                column_types=["Text", "Text", "Text", "Text"],
                                data=ph_genes_array_all)
                            self.iris.add_to_env(
                                'drug_indications' + query_name,
                                ph_genes_array_all_iris)
                            variable_info.append('drug_indications' +
                                                 query_name)

                    assoc_variables.append(', '.join(variable_info))

            # Save info as an iris dataframe
            info_data = [
                list(x) for x in zip(drug_arr, disease_arr, worked_arr,
                                     suffix_arr, assoc_variables)
            ]
            info_df = iris_objects.IrisDataframe(
                column_names=[
                    "Drug", "Disease", "Query Status", "Suffix",
                    "Associated Variables"
                ],
                column_types=["Text", "Text", "Text", "Text", "Text"],
                data=info_data)
            explanation_array.append(info_df)

            return explanation_array

Example #15

0

Show file

 def command(self, array):
     from scipy.stats import skew
     return iris_objects.IrisDataframe(
         column_names=array.column_names,
         column_types=array.column_types,
         data=[skew(array.to_matrix(), axis=0, nan_policy='omit')])

Example #16

0

Show file

    def explanation(self, results):

        iris_df = iris_objects.IrisDataframe(data=results)

        return [ "These are the enriched molecular function GO terms with corrected p-values", iris_df]

Example #17

0

Show file

File: Min.py Project: tchen0123/iris-agent

 def command(self, array):
     import numpy as np
     return iris_objects.IrisDataframe(column_names=array.column_names,
                                       column_types=array.column_types,
                                       data=[array.to_matrix().min(axis=0)])