Python extract_cuffdiff_dataの例

プログラミング言語: Python

名前空間/パッケージ名: script_util2

メソッド/関数: extract_cuffdiff_data

hotexamples.comのコード掲載数: 12

Python extract_cuffdiff_data - 12件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのscript_util2.extract_cuffdiff_dataの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: kb_cummerbundImpl.py プロジェクト: srividya22/kb_cummerbund

    def generate_cummerbund_plots(self, ctx, cummerbundParams):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN generate_cummerbund_plots

        params    = cummerbundParams
        returnVal = params['ws_cummerbund_output']

        #Set up workspace client
        user_token = ctx['token']
        ws_client  = Workspace(url=self.__WS_URL, token=user_token)

        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name' : params['ws_cuffdiff_id'],
            'workspace' : params['workspace_name']
            }])

        # Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal

        cuffdiff_dir = script_util2.extract_cuffdiff_data (self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token)
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        if (cuffdiff_dir is False):
            return returnVal

        # Run R script to run cummerbund json and update the cummerbund output json file
        # Prepare output object.
        outputobject=dict()

        # Prepare output plot list
        cummerbundplotset=[]

        # List of plots to generate
        plotlist = [
                { 'file': "dispersionplot.R",
                  'title': "Dispersion plot",
                  'description': "Dispersion plot is the quality measure of the data. It estimates deviation from threshold against counts in FPKM." },


                { 'file': "fpkmscvplot.R",
                  'title': "Genes CV plot",
                  'description': "The squared coefficient of variation plot is a normalized measure of cross-replicate variability that can be useful for evaluating the quality of RNA-seq data." },

                { 'file': "isoformscvplot.R",
                  'title': "Isoform CV plot",
                  'description': "The squared coefficient of variation plot is a normalized measure of cross-replicate variability that can be useful for evaluating the quality of RNA-seq data.Differences in CV2 can result in lower numbers of differentially expressed isoforms due to a higher degree of variability between replicate fpkm estimates." },

                { 'file': "densityplot.R",
                  'title': "Density plot",
                  'description': "The density plot shows the distribution of FPKM scores across samples" },

                { 'file': "csdensityrepplot.R",
                  'title': "Replicates density plot",
                  'description': "The replicates density plot shows the distribution of FPKM scores across sample replicates" },

                { 'file': "boxplot.R",
                  'title': "Box plots",
                  'description': "The box plots show the FPKM distribution across samples." },

                { 'file': "boxrepplot.R",
                  'title': "Box plots of replicates",
                  'description': "The box plots of replicates show the FPKM distribution across sample replicates." },

                { 'file': "pairwisescatterplots.R",
                  'title': "Pairwise scatter plots",
                  'description': "The scatterplots show differences in gene expression between two samples. If two samples are identical, all genes will fall on the mid-line." },

                 { 'file': "volcanomatrixplot.R",
                  'title': "Volcano matrix plots",
                  'description': "Volcano matrix plot is a scatter plot that also identifies differentially expressed genes (by color) between samples based on log2 fold change cut off." },

                { 'file': "pcaplot.R",
                  'title': "PCA plot",
                  'description': "Principal Component Analysis (PCA) is an informative approach for dimensionality reduction for exploring teh relationship between sample conditions." },

                { 'file': "pcarepplot.R",
                  'title': "PCA plot including replicates",
                  'description': "Principal Component Analysis (PCA) is an informative approach for dimensionality reduction for exploring teh relationship between sample conditions including replicates." },

                { 'file': "mdsplot.R",
                  'title': "Multi-dimensional scaling plot",
                  'description': "Multi-dimensional scaling plots are similar to PCA plots and useful for determining the major sources of variation in the dataset. " },

                { 'file': "mdsrepplot.R",
                  'title': "Multi-dimensional scaling plot including replicates",
                  'description': "Multi-dimensional scaling plot including replicates are  similar to PCA plots and useful for determining the major sources of variation in the dataset with replicates. These can be useful to determine any systematic bias that may be present between conditions." }
            ]

#TODO.. Giving Rplot.pdf
#                { 'file': "dendrogramplot.R",
#                  'title': "Dendrogram",
#                  'description': "Dendrogram  based on the JS (Jensen-Shannon divergence) distance" },
#
#                { 'file': "dendrogramrepplot.R",
#                  'title': "Dendrogram including replicates",
#                  'description': "Dendrogram including replicates based on the JS (Jensen-Shannon divergence) distance" },


        # Iterate through the plotlist and generate the images and json files.
        for plot in plotlist:
            status = script_util2.rplotandupload(self.__LOGGER, self.__SCRATCH, self.__RSCRIPTS,
                plot['file'], self.__SHOCK_URL, self.__HS_URL, user_token,
                cummerbundplotset, plot['title'], plot['description'], cuffdiff_dir)
            if status == False:
                self.__LOGGER.info("Problem generating image and json file - " + plot["file"])


        # Populate the output object
        outputobject['cummerbundplotSet'] = cummerbundplotset

        #TODO: Need to figure out how to get rnaseq experiment id
        outputobject['rnaseq_experiment_id'] = "rnaseq_experiment_id"
        outputobject['cuffdiff_input_id'] = params['ws_cuffdiff_id']

        res = ws_client.save_objects({
            "workspace":params['workspace_name'],
            "objects": [{
                "type":"KBaseRNASeq.cummerbund_output",
                "data":outputobject,
                "name":params["ws_cummerbund_output"]}]
            })

        #END generate_cummerbund_plots

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method generate_cummerbund_plots return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]

コード例 #2

ファイルを表示

ファイル: kb_cummerbundImpl.py プロジェクト: srividya22/kb_cummerbund

    def create_expression_matrix(self, ctx, expressionMatrixParams):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN create_expression_matrix


        params    = expressionMatrixParams
        returnVal = params['ws_expression_matrix_id']
        #Set up workspace client
        user_token = ctx['token']
        workspace = params['workspace_name']
        ws_client  = Workspace(url=self.__WS_URL, token=user_token)

        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name' : params['ws_cuffdiff_id'],
            'workspace' : params['workspace_name']
            }])

        # Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal

        cuffdiff_dir = join (self.__SCRATCH , "cuffdiffData/cuffdiff")
        cuffdiff_dir = script_util2.extract_cuffdiff_data (self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token)
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        if (cuffdiff_dir is False):
            return returnVal

        # Run R script to get fpkmgenematrix.R

        # Prepare output object.
        outjson = False;
        #outjson = "repfpkmgenematrix.R.matrix.txt.json";

        if params['include_replicates'] ==0:
         scriptfile = "fpkmgenematrix.R"
         outjson = script_util2.generate_and_upload_expression_matrix(self.__LOGGER, self.__SCRATCH,
                    self.__RSCRIPTS, scriptfile, self.__SHOCK_URL, self.__HS_URL, user_token,
                    cuffdiff_dir, self.__WS_URL,workspace)


        else:
         scriptfile = "repfpkmgenematrix.R"
         outjson = script_util2.generate_and_upload_expression_matrix(self.__LOGGER, self.__SCRATCH,
                    self.__RSCRIPTS, scriptfile, self.__SHOCK_URL, self.__HS_URL, user_token,
                    cuffdiff_dir, self.__WS_URL,workspace)

        if outjson is False:
            self.__LOGGER.info("Creation of expression matrix failed")
            return returnVal
        with open("{0}/{1}".format(self.__SCRATCH , outjson),'r') as et:
                  eo = json.load(et)
        eo['type']='untransformed'
        genome_ref = s_res[0]['data']['genome_id']
        #eo['genome_ref'] = genome_ref

        self.__LOGGER.info(workspace + self.__SCRATCH + outjson + params['ws_expression_matrix_id'])
        ws_client.save_objects({'workspace' : workspace,
            'objects' : [{ 'type' : 'KBaseFeatureValues.ExpressionMatrix',
                           'data' : eo,
                           'name' : params['ws_expression_matrix_id']
                        }]})


        #END create_expression_matrix

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method create_expression_matrix return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]

コード例 #3

ファイルを表示

ファイル: kb_cummerbundImpl.py プロジェクト: srividya22/kb_cummerbund

    def create_interactive_heatmap_de_genes(self, ctx, interactiveHeatmapParams):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN create_interactive_heatmap_de_genes
        fparams    = interactiveHeatmapParams
        #returnVal = "ttt"
        #Set up workspace client
        user_token = ctx['token']
        workspace = fparams['workspace_name']
        ws_client  = Workspace(url=self.__WS_URL, token=user_token)
        system_params = {}
        system_params['token'] = user_token
        system_params['ws_url'] =  self.__WS_URL
        system_params['logger'] =  self.__LOGGER
        system_params['shock_url'] =  self.__SHOCK_URL
        system_params['hs_url'] =  self.__HS_URL
        system_params['scratch'] =  self.__SCRATCH
        system_params['rscripts'] =  self.__RSCRIPTS
        system_params['workspace'] = workspace






        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name' : fparams['ws_cuffdiff_id'],
            'workspace' : fparams['workspace_name']
            }])

         #Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal
        cuffdiff_dir = join (self.__SCRATCH , "cuffdiffData/cuffdiff")
        cuffdiff_dir = script_util2.extract_cuffdiff_data (self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token)
        #cuffdiff_dir = "/kb/module/work/nnc/cuffdiff"
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)


        #if (cuffdiff_dir is False):
        #    return returnVal
        fparams['cuffdiff_dir'] = cuffdiff_dir
        fparams['infile'] = join (cuffdiff_dir, "gene_exp.diff")
        fparams['outfile'] = join(system_params['scratch'],  "gene_exp.diff.filter")

        filtered_matrix = script_util2.filter_expression_matrix(fparams, system_params)
        self.__LOGGER.info("matrix is " + filtered_matrix)

        fparams['infile'] = join (system_params['scratch'], "gene_exp.diff.filter")
        fparams['outfile'] = join(system_params['scratch'],  "gene_exp.diff.filter.genelist")



        genelist_filtered_matrix_file = script_util2.get_gene_list_from_filter_step(fparams)


        # Prepare output object.
        outjson = False;
 

        rparams = {}
        rparams['genelist'] = filtered_matrix
        rparams['cuffdiff_dir'] = fparams['cuffdiff_dir']
        rparams['outpng'] = join (system_params['scratch'], "heatmap.png")
        rparams['imageheight'] = 1600
        rparams['imagewidth'] = 800
        rparams['plotscript'] = join(system_params['rscripts'], "heatmapplotinteractive.R")
        rparams['include_replicates'] = 1
        rparams['outmatrix'] = join (system_params['scratch'], "outmatrix")

        roptstr_basic_heatmap_rep = script_util2.get_command_line_heatmap_basic (rparams)

        # Run R script to run cummerbund json and update the cummerbund output json file
        # Prepare output object.
        outputobject=dict()




        # Prepare output plot list
        cummerbundplotset=[]

        # List of plots to generate
        plotlist = [
                  
                { 'roptstr': roptstr_basic_heatmap_rep,
                  'title': "Heatmap",
                  'description': "Heatmap", 
                  'exp' : fparams['ws_expression_matrix_id']
                  }

            ]
        fparams['cummerbundplotset'] = cummerbundplotset
        # Iterate through the plotlist and generate the images and json files.
        for plot in plotlist:
            fparams['title'] = plot['title']
            fparams['description'] = plot['description']


            status = script_util2.rplotanduploadinteractive(system_params,fparams, rparams, plot['roptstr'])
            if status == False:
                   self.__LOGGER.info("Problem generating image and json file - " + plot["roptstr"])
            else:
                  self.__LOGGER.info(status)

                  outjson = status
                  with open("{0}/{1}".format(self.__SCRATCH , outjson),'r') as et2:
                    eo2 = json.load(et2)
                    genome_ref = s_res[0]['data']['genome_id']
                    eo2['type']='untransformed'
                    #eo2['genome_ref'] = genome_ref
                    self.__LOGGER.info(workspace + self.__SCRATCH + outjson + plot['exp'])
                    ws_client.save_objects({'workspace' : workspace,
                           'objects' : [{ 'type' : 'KBaseFeatureValues.ExpressionMatrix',
                           'data' : eo2,
                           'name' : plot['exp']
                     }]})

        returnVal = fparams['ws_expression_matrix_id']

        #END create_interactive_heatmap_de_genes

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method create_interactive_heatmap_de_genes return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]

コード例 #4

ファイルを表示

ファイル: kb_cummerbundImpl.py プロジェクト: dcchivian/kb_cummerbund

    def generate_cummerbund_plot2(self, ctx, cummerbundstatParams):
        """
        :param cummerbundstatParams: instance of type "cummerbundstatParams"
           -> structure: parameter "workspace" of String, parameter
           "ws_cuffdiff_id" of type "ws_cuffdiff_id" (@id ws
           KBaseRNASeq.RNASeqCuffdiffdifferentialExpression), parameter
           "ws_cummerbund_output" of type "ws_cummerbund_output" (@id ws
           KBaseRNASeq.cummerbund_output), parameter "ws_diffstat_output" of
           type "ws_diffstat_output" (Differential stat workspace id)
        :returns: instance of type "ws_cummerbund_output" (@id ws
           KBaseRNASeq.cummerbund_output)
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN generate_cummerbund_plot2
        params    = cummerbundstatParams
        returnVal = params['ws_cummerbund_output']

        #Set up workspace client
        user_token = ctx['token']
        ws_client  = Workspace(url=self.__WS_URL, token=user_token)


        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name' : params['ws_cuffdiff_id'],
            'workspace' : params['workspace']
            }])
        print "Getting genome info"

        genome_ref = s_res[0]['data']['genome_id']
        #genome_ref = '2702/6/2'
        #genome_ref = '2702/26/1'
        #genome_ref = '2229/21/10'
        print genome_ref
        gaapi = GenomeAnnotationAPI(self.callbackURL, token=user_token)
        genome = gaapi.get_genome_v1({"genomes": [{"ref": genome_ref}],
                                          "included_fields": ["scientific_name"],
                                          "included_feature_fields": ["id", "function", "type"
                                                                      ]})["genomes"][0]["data"]
        genome_dict = {}
        features = genome['features']
        for feature in features:
          id = feature['id']
          try: 
            function = feature['function']
            if not function:
              function = 'Unknown'
          except:
             function = 'Unknown'
          genome_dict[id] = function


        # Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal

        cuffdiff_dir = script_util2.extract_cuffdiff_data (self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token)
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        if (cuffdiff_dir is False):
            return returnVal

        # Run R script to run cummerbund json and update the cummerbund output json file
        # Prepare output object.
        outputobject=dict()

        # Prepare output plot list
        cummerbundplotset=[]
        # List of plots to generate
        plotlist = [
                { 'file': "dispersionplot.R",
                  'title': "Dispersion plot",
                  'description': "Dispersion plot is the quality measure of the data. It estimates deviation from threshold against counts in FPKM." },


                { 'file': "fpkmscvplot.R",
                  'title': "Genes CV plot",
                  'description': "The squared coefficient of variation plot is a normalized measure of cross-replicate variability that can be useful for evaluating the quality of RNA-seq data." },

                { 'file': "isoformscvplot.R",
                  'title': "Isoform CV plot",
                  'description': "The squared coefficient of variation plot is a normalized measure of cross-replicate variability that can be useful for evaluating the quality of RNA-seq data.Differences in CV2 can result in lower numbers of differentially expressed isoforms due to a higher degree of variability between replicate fpkm estimates." },

                { 'file': "densityplot.R",
                  'title': "Density plot",
                  'description': "The density plot shows the distribution of FPKM scores across samples" },

                { 'file': "csdensityrepplot.R",
                  'title': "Replicates density plot",
                  'description': "The replicates density plot shows the distribution of FPKM scores across sample replicates" },

                { 'file': "boxplot.R",
                  'title': "Box plots",
                  'description': "The box plots show the FPKM distribution across samples." },

                { 'file': "boxrepplot.R",
                  'title': "Box plots of replicates",
                  'description': "The box plots of replicates show the FPKM distribution across sample replicates." },

                { 'file': "pairwisescatterplots.R",
                  'title': "Pairwise scatter plots",
                  'description': "The scatterplots show differences in gene expression between two samples. If two samples are identical, all genes will fall on the mid-line." },

                 { 'file': "volcanomatrixplot.R",
                  'title': "Volcano matrix plots",
                  'description': "Volcano matrix plot is a scatter plot that also identifies differentially expressed genes (by color) between samples based on log2 fold change cut off." },

                { 'file': "pcaplot.R",
                  'title': "PCA plot",
                  'description': "Principal Component Analysis (PCA) is an informative approach for dimensionality reduction for exploring teh relationship between sample conditions." },

                { 'file': "pcarepplot.R",
                  'title': "PCA plot including replicates",
                  'description': "Principal Component Analysis (PCA) is an informative approach for dimensionality reduction for exploring teh relationship between sample conditions including replicates." },

                { 'file': "mdsplot.R",
                  'title': "Multi-dimensional scaling plot",
                  'description': "Multi-dimensional scaling plots are similar to PCA plots and useful for determining the major sources of variation in the dataset. " },

                { 'file': "mdsrepplot.R",
                  'title': "Multi-dimensional scaling plot including replicates",
                  'description': "Multi-dimensional scaling plot including replicates are  similar to PCA plots and useful for determining the major sources of variation in the dataset with replicates. These can be useful to determine any systematic bias that may be present between conditions." }
            ]


        # Iterate through the plotlist and generate the images and json files.
        for plot in plotlist:
            status = script_util2.rplotandupload(self.__LOGGER, self.__SCRATCH, self.__RSCRIPTS,
                plot['file'], self.__SHOCK_URL, self.__HS_URL, user_token,
                cummerbundplotset, plot['title'], plot['description'], cuffdiff_dir)
            if status == False:
                self.__LOGGER.info("Problem generating image and json file - " + plot["file"])


        # Populate the output object
        outputobject['cummerbundplotSet'] = cummerbundplotset

        #TODO: Need to figure out how to get rnaseq experiment id
        outputobject['rnaseq_experiment_id'] = "rnaseq_experiment_id"
        outputobject['cuffdiff_input_id'] = params['ws_cuffdiff_id']

        res = ws_client.save_objects({
            "workspace":params['workspace'],
            "objects": [{
                "type":"KBaseRNASeq.cummerbund_output",
                "data":outputobject,
                "name":params["ws_cummerbund_output"]}]
            })

        infile =  join(cuffdiff_dir, "gene_exp.diff") 
        outfile = join(cuffdiff_dir, "gene_exp_diff.out") 
        x=v.volcano_plot_data_parse_and_upload(infile,outfile, genome_dict)
        with open(outfile) as f:
            statdata = json.load(f)
        res = ws_client.save_objects({
            "workspace":params['workspace'],
            "objects": [{
                "type":"KBaseRNASeq.DifferentialExpressionStat",
                "data":statdata,
                "name":params["ws_diffstat_output"]}]
            })

        #END generate_cummerbund_plot2

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method generate_cummerbund_plot2 return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]

コード例 #5

ファイルを表示

ファイル: kb_cummerbundImpl.py プロジェクト: dcchivian/kb_cummerbund

    def create_interactive_heatmap_de_genes_old(self, ctx, heatmapParams):
        """
        :param heatmapParams: instance of type "heatmapParams" -> structure:
           parameter "workspace" of String, parameter "sample1" of String,
           parameter "sample2" of String, parameter "q_value_cutoff" of
           Double, parameter "log2_fold_change_cutoff" of Double, parameter
           "num_genes" of Long, parameter "ws_cuffdiff_id" of type
           "ws_cuffdiff_id" (@id ws
           KBaseRNASeq.RNASeqCuffdiffdifferentialExpression), parameter
           "ws_expression_matrix_id" of type "ws_expression_matrix_id" (@id
           ws KBaseFeatureValues.ExpressionMatrix), parameter
           "ws_cummerbund_output" of type "ws_cummerbund_output" (@id ws
           KBaseRNASeq.cummerbund_output)
        :returns: instance of type "ResultsToReport" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN create_interactive_heatmap_de_genes_old
        fparams    = heatmapParams
        #returnVal = "ttt"
        #Set up workspace client
        user_token = ctx['token']
        workspace = fparams['workspace']
        ws_client  = Workspace(url=self.__WS_URL, token=user_token)
        system_params = {}
        system_params['token'] = user_token
        system_params['ws_url'] =  self.__WS_URL
        system_params['logger'] =  self.__LOGGER
        system_params['shock_url'] =  self.__SHOCK_URL
        system_params['hs_url'] =  self.__HS_URL
        system_params['scratch'] =  self.__SCRATCH
        system_params['rscripts'] =  self.__RSCRIPTS
        system_params['workspace'] = workspace




        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name' : fparams['ws_cuffdiff_id'],
            'workspace' : fparams['workspace']
            }])

         #Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal
        cuffdiff_dir = join (self.__SCRATCH , "cuffdiffData/cuffdiff")
        cuffdiff_dir = script_util2.extract_cuffdiff_data (self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token)
        #cuffdiff_dir = "/kb/module/work/cuffdiffData/cuffdiff"
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)


        #if (cuffdiff_dir is False):
        #    return returnVal
        fparams['cuffdiff_dir'] = cuffdiff_dir
        fparams['infile'] = join (cuffdiff_dir, "gene_exp.diff")
        fparams['outfile'] = join(system_params['scratch'],  "gene_exp.diff.filter")


        fparams['pairs']=1
        fparams['logModetmp'] = 2



        rparams = {}
        
        rparams['cuffdiff_dir'] = fparams['cuffdiff_dir']
        rparams['outpng'] = join (system_params['scratch'], "heatmap.png")
        rparams['imageheight'] = 1600
        rparams['imagewidth'] = 800
        rparams['plotscript'] = join(system_params['rscripts'], "heatmapplotinteractive.R")
        rparams['include_replicates'] = 1
        rparams['pairs'] = fparams ['pairs']
        rparams['logMode'] = fparams['logModetmp']
        rparams['removezeroes'] = 1
        rparams['outmatrix'] = join (system_params['scratch'], "outmatrix")
        reportObj = {}

        provenance = [{}]
        if 'provenance' in ctx:
            provenance = ctx['provenance']
        # add additional info to provenance here, in this case the input data object reference
        provenance[0]['input_ws_objects']=[workspace+'/'+fparams['ws_cuffdiff_id']]
       
        report = ""
        if (fparams['pairs'] != 0):
        
           try:
                filtered_matrix = script_util2.filter_expression_matrix(fparams, system_params)
                self.__LOGGER.info("matrix is " + filtered_matrix)
                fparams['infile'] = join (system_params['scratch'], "gene_exp.diff.filter")
                fparams['outfile'] = join(system_params['scratch'],  "gene_exp.diff.filter.genelist")
                genelist_filtered_matrix_file = script_util2.get_gene_list_from_filter_step(fparams)
                rparams['genelist'] = filtered_matrix
           except:
                report += "There was an error in creating expression matrix"
                report += "No differentially expressed genes were found"
                report += "Please change / double check  your filtering criteria"

	        reportObj = {
		    'objects_created':[],
		    'text_message':report
		}

		reportName = 'create_interactive_heatmap_de_genes_old_'+str(hex(uuid.getnode()))
		report_info = ws_client.save_objects({
		    'workspace':fparams['workspace'],
		    'objects':[
			 {
			  'type':'KBaseReport.Report',
			  'data':reportObj,
			  'name':reportName,
			  'meta':{},
			  'hidden':1, # important!  make sure the report is hidden
			  'provenance':provenance
			 }
		    ] })[0]  
		print('saved Report: '+pformat(report_info))

		returnVal = { "report_name" : reportName,"report_ref" : str(report_info[6]) + '/' + str(report_info[0]) + '/' + str(report_info[4]) }

                return [returnVal]


        try:
	    # Prepare output object.
	    outjson = False;
     

	    roptstr_basic_heatmap_rep = script_util2.get_command_line_heatmap_basic (rparams)

	    # Run R script to run cummerbund json and update the cummerbund output json file
	    # Prepare output object.
	    outputobject=dict()




	    # Prepare output plot list
	    cummerbundplotset=[]

	    # List of plots to generate
	    plotlist = [
		      
		    { 'roptstr': roptstr_basic_heatmap_rep,
		      'title': "Heatmap",
		      'description': "Heatmap", 
		      'exp' : fparams['ws_expression_matrix_id']
		      }

		]
	    fparams['cummerbundplotset'] = cummerbundplotset
	    # Iterate through the plotlist and generate the images and json files.
	    for plot in plotlist:
		fparams['title'] = plot['title']
		fparams['description'] = plot['description']


		status = script_util2.rplotanduploadinteractive(system_params,fparams, rparams, plot['roptstr'])
		if status == False:
                    self.__LOGGER.info("Problem generating image and json file - " + plot["roptstr"])
                    report = "Error: Please select a different cutoff criteria. None of the genes passed fold change and q-value-cutoff. "
                    report += "Failed to create expression  matrix with differentially expressed genes(" +  fparams['ws_expression_matrix_id'] + "). No genes to show on heatmap."
                    reportObj = {
                    'objects_created':[],
                    'text_message':report
                    }
                    reportName = 'create_interactive_heatmap_de_genes_old_'+str(hex(uuid.getnode()))
                    report_info = ws_client.save_objects({
                        'workspace':fparams['workspace'],
                        'objects':[
                        {
                        'type':'KBaseReport.Report',
                        'data':reportObj,
                        'name':reportName,
                        'meta':{},
                        'hidden':1, # important!  make sure the report is hidden
                        'provenance':provenance
                    }
                    ] })[0]  
                    print('saved Report: '+pformat(report_info))

                    returnVal = { "report_name" : reportName,"report_ref" : str(report_info[6]) + '/' + str(report_info[0]) + '/' + str(report_info[4]) }

                    return [returnVal]


		else:
                      
		      self.__LOGGER.info(status)

		      outjson = status
		      self.__LOGGER.info('5')
		      with open("{0}/{1}".format(self.__SCRATCH , outjson),'r') as et2:

			eo2 = json.load(et2)
			genome_ref = s_res[0]['data']['genome_id']
			eo2['type']='log2_level'
			eo2['genome_ref'] = genome_ref
		        self.__LOGGER.info('3')
			self.__LOGGER.info(workspace + self.__SCRATCH + outjson + plot['exp'])
                        try:
                            res = ws_client.save_objects({'workspace' : workspace,
                                   'objects' : [{ 'type' : 'KBaseFeatureValues.ExpressionMatrix',
                                   'data' : eo2,
                                   'name' : plot['exp']
                             }]})
                        except:
                            self.__LOGGER ("xxxx6")

        except:
		self.__LOGGER.info('6')
        report = "Successfully created expression matrix"
        reportObj = {
             'objects_created':[],
             'text_message':report
              }

        self.__LOGGER.info('7')

	reportName = 'create_interactive_heatmap_de_genes_old_'+str(hex(uuid.getnode()))
	report_info = ws_client.save_objects({
	    'workspace':fparams['workspace'],
	    'objects':[
		 {
		  'type':'KBaseReport.Report',
		  'data':reportObj,
		  'name':reportName,
		  'meta':{},
		  'hidden':1, # important!  make sure the report is hidden
		  'provenance':provenance
		 }
	    ] })[0]  
	print('saved Report: '+pformat(report_info))

	returnVal = { "report_name" : reportName,"report_ref" : str(report_info[6]) + '/' + str(report_info[0]) + '/' + str(report_info[4]) }


        #END create_interactive_heatmap_de_genes_old

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method create_interactive_heatmap_de_genes_old return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

コード例 #6

ファイルを表示

ファイル: kb_cummerbundImpl.py プロジェクト: srividya22/kb_cummerbund

    def create_interactive_heatmap_de_genes(self, ctx,
                                            interactiveHeatmapParams):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN create_interactive_heatmap_de_genes
        fparams = interactiveHeatmapParams
        #returnVal = "ttt"
        #Set up workspace client
        user_token = ctx['token']
        workspace = fparams['workspace_name']
        ws_client = Workspace(url=self.__WS_URL, token=user_token)
        system_params = {}
        system_params['token'] = user_token
        system_params['ws_url'] = self.__WS_URL
        system_params['logger'] = self.__LOGGER
        system_params['shock_url'] = self.__SHOCK_URL
        system_params['hs_url'] = self.__HS_URL
        system_params['scratch'] = self.__SCRATCH
        system_params['rscripts'] = self.__RSCRIPTS
        system_params['workspace'] = workspace

        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name': fparams['ws_cuffdiff_id'],
            'workspace': fparams['workspace_name']
        }])

        #Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal
        cuffdiff_dir = join(self.__SCRATCH, "cuffdiffData/cuffdiff")
        cuffdiff_dir = script_util2.extract_cuffdiff_data(
            self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token)
        #cuffdiff_dir = "/kb/module/work/nnc/cuffdiff"
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        #if (cuffdiff_dir is False):
        #    return returnVal
        fparams['cuffdiff_dir'] = cuffdiff_dir
        fparams['infile'] = join(cuffdiff_dir, "gene_exp.diff")
        fparams['outfile'] = join(system_params['scratch'],
                                  "gene_exp.diff.filter")

        filtered_matrix = script_util2.filter_expression_matrix(
            fparams, system_params)
        self.__LOGGER.info("matrix is " + filtered_matrix)

        fparams['infile'] = join(system_params['scratch'],
                                 "gene_exp.diff.filter")
        fparams['outfile'] = join(system_params['scratch'],
                                  "gene_exp.diff.filter.genelist")

        genelist_filtered_matrix_file = script_util2.get_gene_list_from_filter_step(
            fparams)

        # Prepare output object.
        outjson = False

        rparams = {}
        rparams['genelist'] = filtered_matrix
        rparams['cuffdiff_dir'] = fparams['cuffdiff_dir']
        rparams['outpng'] = join(system_params['scratch'], "heatmap.png")
        rparams['imageheight'] = 1600
        rparams['imagewidth'] = 800
        rparams['plotscript'] = join(system_params['rscripts'],
                                     "heatmapplotinteractive.R")
        rparams['include_replicates'] = 1
        rparams['outmatrix'] = join(system_params['scratch'], "outmatrix")

        roptstr_basic_heatmap_rep = script_util2.get_command_line_heatmap_basic(
            rparams)

        # Run R script to run cummerbund json and update the cummerbund output json file
        # Prepare output object.
        outputobject = dict()

        # Prepare output plot list
        cummerbundplotset = []

        # List of plots to generate
        plotlist = [{
            'roptstr': roptstr_basic_heatmap_rep,
            'title': "Heatmap",
            'description': "Heatmap",
            'exp': fparams['ws_expression_matrix_id']
        }]
        fparams['cummerbundplotset'] = cummerbundplotset
        # Iterate through the plotlist and generate the images and json files.
        for plot in plotlist:
            fparams['title'] = plot['title']
            fparams['description'] = plot['description']

            status = script_util2.rplotanduploadinteractive(
                system_params, fparams, rparams, plot['roptstr'])
            if status == False:
                self.__LOGGER.info(
                    "Problem generating image and json file - " +
                    plot["roptstr"])
            else:
                self.__LOGGER.info(status)

                outjson = status
                with open("{0}/{1}".format(self.__SCRATCH, outjson),
                          'r') as et2:
                    eo2 = json.load(et2)
                    genome_ref = s_res[0]['data']['genome_id']
                    eo2['type'] = 'untransformed'
                    #eo2['genome_ref'] = genome_ref
                    self.__LOGGER.info(workspace + self.__SCRATCH + outjson +
                                       plot['exp'])
                    ws_client.save_objects({
                        'workspace':
                        workspace,
                        'objects': [{
                            'type': 'KBaseFeatureValues.ExpressionMatrix',
                            'data': eo2,
                            'name': plot['exp']
                        }]
                    })

        returnVal = fparams['ws_expression_matrix_id']

        #END create_interactive_heatmap_de_genes

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError(
                'Method create_interactive_heatmap_de_genes return value ' +
                'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]

コード例 #7

ファイルを表示

ファイル: kb_cummerbundImpl.py プロジェクト: srividya22/kb_cummerbund

    def generate_cummerbund_plots(self, ctx, cummerbundParams):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN generate_cummerbund_plots

        params = cummerbundParams
        returnVal = params['ws_cummerbund_output']

        #Set up workspace client
        user_token = ctx['token']
        ws_client = Workspace(url=self.__WS_URL, token=user_token)

        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name': params['ws_cuffdiff_id'],
            'workspace': params['workspace_name']
        }])

        # Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal

        cuffdiff_dir = script_util2.extract_cuffdiff_data(
            self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token)
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        if (cuffdiff_dir is False):
            return returnVal

        # Run R script to run cummerbund json and update the cummerbund output json file
        # Prepare output object.
        outputobject = dict()

        # Prepare output plot list
        cummerbundplotset = []

        # List of plots to generate
        plotlist = [{
            'file':
            "dispersionplot.R",
            'title':
            "Dispersion plot",
            'description':
            "Dispersion plot is the quality measure of the data. It estimates deviation from threshold against counts in FPKM."
        }, {
            'file':
            "fpkmscvplot.R",
            'title':
            "Genes CV plot",
            'description':
            "The squared coefficient of variation plot is a normalized measure of cross-replicate variability that can be useful for evaluating the quality of RNA-seq data."
        }, {
            'file':
            "isoformscvplot.R",
            'title':
            "Isoform CV plot",
            'description':
            "The squared coefficient of variation plot is a normalized measure of cross-replicate variability that can be useful for evaluating the quality of RNA-seq data.Differences in CV2 can result in lower numbers of differentially expressed isoforms due to a higher degree of variability between replicate fpkm estimates."
        }, {
            'file':
            "densityplot.R",
            'title':
            "Density plot",
            'description':
            "The density plot shows the distribution of FPKM scores across samples"
        }, {
            'file':
            "csdensityrepplot.R",
            'title':
            "Replicates density plot",
            'description':
            "The replicates density plot shows the distribution of FPKM scores across sample replicates"
        }, {
            'file':
            "boxplot.R",
            'title':
            "Box plots",
            'description':
            "The box plots show the FPKM distribution across samples."
        }, {
            'file':
            "boxrepplot.R",
            'title':
            "Box plots of replicates",
            'description':
            "The box plots of replicates show the FPKM distribution across sample replicates."
        }, {
            'file':
            "pairwisescatterplots.R",
            'title':
            "Pairwise scatter plots",
            'description':
            "The scatterplots show differences in gene expression between two samples. If two samples are identical, all genes will fall on the mid-line."
        }, {
            'file':
            "volcanomatrixplot.R",
            'title':
            "Volcano matrix plots",
            'description':
            "Volcano matrix plot is a scatter plot that also identifies differentially expressed genes (by color) between samples based on log2 fold change cut off."
        }, {
            'file':
            "pcaplot.R",
            'title':
            "PCA plot",
            'description':
            "Principal Component Analysis (PCA) is an informative approach for dimensionality reduction for exploring teh relationship between sample conditions."
        }, {
            'file':
            "pcarepplot.R",
            'title':
            "PCA plot including replicates",
            'description':
            "Principal Component Analysis (PCA) is an informative approach for dimensionality reduction for exploring teh relationship between sample conditions including replicates."
        }, {
            'file':
            "mdsplot.R",
            'title':
            "Multi-dimensional scaling plot",
            'description':
            "Multi-dimensional scaling plots are similar to PCA plots and useful for determining the major sources of variation in the dataset. "
        }, {
            'file':
            "mdsrepplot.R",
            'title':
            "Multi-dimensional scaling plot including replicates",
            'description':
            "Multi-dimensional scaling plot including replicates are  similar to PCA plots and useful for determining the major sources of variation in the dataset with replicates. These can be useful to determine any systematic bias that may be present between conditions."
        }]

        #TODO.. Giving Rplot.pdf
        #                { 'file': "dendrogramplot.R",
        #                  'title': "Dendrogram",
        #                  'description': "Dendrogram  based on the JS (Jensen-Shannon divergence) distance" },
        #
        #                { 'file': "dendrogramrepplot.R",
        #                  'title': "Dendrogram including replicates",
        #                  'description': "Dendrogram including replicates based on the JS (Jensen-Shannon divergence) distance" },

        # Iterate through the plotlist and generate the images and json files.
        for plot in plotlist:
            status = script_util2.rplotandupload(
                self.__LOGGER, self.__SCRATCH, self.__RSCRIPTS, plot['file'],
                self.__SHOCK_URL, self.__HS_URL, user_token, cummerbundplotset,
                plot['title'], plot['description'], cuffdiff_dir)
            if status == False:
                self.__LOGGER.info(
                    "Problem generating image and json file - " + plot["file"])

        # Populate the output object
        outputobject['cummerbundplotSet'] = cummerbundplotset

        #TODO: Need to figure out how to get rnaseq experiment id
        outputobject['rnaseq_experiment_id'] = "rnaseq_experiment_id"
        outputobject['cuffdiff_input_id'] = params['ws_cuffdiff_id']

        res = ws_client.save_objects({
            "workspace":
            params['workspace_name'],
            "objects": [{
                "type": "KBaseRNASeq.cummerbund_output",
                "data": outputobject,
                "name": params["ws_cummerbund_output"]
            }]
        })

        #END generate_cummerbund_plots

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method generate_cummerbund_plots return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]

コード例 #8

ファイルを表示

ファイル: kb_cummerbundImpl.py プロジェクト: srividya22/kb_cummerbund

    def create_expression_matrix(self, ctx, expressionMatrixParams):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN create_expression_matrix

        params = expressionMatrixParams
        returnVal = params['ws_expression_matrix_id']
        #Set up workspace client
        user_token = ctx['token']
        workspace = params['workspace_name']
        ws_client = Workspace(url=self.__WS_URL, token=user_token)

        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name': params['ws_cuffdiff_id'],
            'workspace': params['workspace_name']
        }])

        # Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal

        cuffdiff_dir = join(self.__SCRATCH, "cuffdiffData/cuffdiff")
        cuffdiff_dir = script_util2.extract_cuffdiff_data(
            self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token)
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        if (cuffdiff_dir is False):
            return returnVal

        # Run R script to get fpkmgenematrix.R

        # Prepare output object.
        outjson = False
        #outjson = "repfpkmgenematrix.R.matrix.txt.json";

        if params['include_replicates'] == 0:
            scriptfile = "fpkmgenematrix.R"
            outjson = script_util2.generate_and_upload_expression_matrix(
                self.__LOGGER, self.__SCRATCH, self.__RSCRIPTS, scriptfile,
                self.__SHOCK_URL, self.__HS_URL, user_token, cuffdiff_dir,
                self.__WS_URL, workspace)

        else:
            scriptfile = "repfpkmgenematrix.R"
            outjson = script_util2.generate_and_upload_expression_matrix(
                self.__LOGGER, self.__SCRATCH, self.__RSCRIPTS, scriptfile,
                self.__SHOCK_URL, self.__HS_URL, user_token, cuffdiff_dir,
                self.__WS_URL, workspace)

        if outjson is False:
            self.__LOGGER.info("Creation of expression matrix failed")
            return returnVal
        with open("{0}/{1}".format(self.__SCRATCH, outjson), 'r') as et:
            eo = json.load(et)
        eo['type'] = 'untransformed'
        genome_ref = s_res[0]['data']['genome_id']
        #eo['genome_ref'] = genome_ref

        self.__LOGGER.info(workspace + self.__SCRATCH + outjson +
                           params['ws_expression_matrix_id'])
        ws_client.save_objects({
            'workspace':
            workspace,
            'objects': [{
                'type': 'KBaseFeatureValues.ExpressionMatrix',
                'data': eo,
                'name': params['ws_expression_matrix_id']
            }]
        })

        #END create_expression_matrix

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method create_expression_matrix return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]

コード例 #9

ファイルを表示

ファイル: kb_cummerbundImpl.py プロジェクト: pranjan77/kb_cummerbund

    def create_interactive_heatmap_de_genes_old(self, ctx, heatmapParams):
        """
        :param heatmapParams: instance of type "heatmapParams" -> structure:
           parameter "sample1" of String, parameter "sample2" of String,
           parameter "q_value_cutoff" of Double, parameter
           "log2_fold_change_cutoff" of Double, parameter "num_genes" of
           Long, parameter "ws_cuffdiff_id" of type "ws_cuffdiff_id" (@id ws
           KBaseRNASeq.RNASeqCuffdiffdifferentialExpression), parameter
           "ws_expression_matrix_id1" of type "ws_expression_matrix_id" (@id
           ws KBaseFeatureValues.ExpressionMatrix), parameter
           "ws_expression_matrix_id2" of type "ws_expression_matrix_id" (@id
           ws KBaseFeatureValues.ExpressionMatrix), parameter
           "ws_cummerbund_output" of type "ws_cummerbund_output" (@id ws
           KBaseRNASeq.cummerbund_output)
        :returns: instance of type "ResultsToReport" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        # BEGIN create_interactive_heatmap_de_genes_old
        fparams = heatmapParams
        # returnVal = "ttt"
        # Set up workspace client
        user_token = ctx["token"]
        workspace = fparams["workspace_name"]
        ws_client = Workspace(url=self.__WS_URL, token=user_token)
        system_params = {}
        system_params["token"] = user_token
        system_params["ws_url"] = self.__WS_URL
        system_params["logger"] = self.__LOGGER
        system_params["shock_url"] = self.__SHOCK_URL
        system_params["hs_url"] = self.__HS_URL
        system_params["scratch"] = self.__SCRATCH
        system_params["rscripts"] = self.__RSCRIPTS
        system_params["workspace"] = workspace

        # Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{"name": fparams["ws_cuffdiff_id"], "workspace": fparams["workspace_name"]}])

        # Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal
        cuffdiff_dir = join(self.__SCRATCH, "cuffdiffData/cuffdiff")
        cuffdiff_dir = script_util2.extract_cuffdiff_data(
            self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token
        )
        # cuffdiff_dir = "/kb/module/work/nnc/cuffdiff"
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        # if (cuffdiff_dir is False):
        #    return returnVal
        fparams["cuffdiff_dir"] = cuffdiff_dir
        fparams["infile"] = join(cuffdiff_dir, "gene_exp.diff")
        fparams["outfile"] = join(system_params["scratch"], "gene_exp.diff.filter")

        fparams["pairs"] = 1
        fparams["logModetmp"] = 2

        rparams = {}

        rparams["cuffdiff_dir"] = fparams["cuffdiff_dir"]
        rparams["outpng"] = join(system_params["scratch"], "heatmap.png")
        rparams["imageheight"] = 1600
        rparams["imagewidth"] = 800
        rparams["plotscript"] = join(system_params["rscripts"], "heatmapplotinteractive.R")
        rparams["include_replicates"] = 1
        rparams["pairs"] = fparams["pairs"]
        rparams["logMode"] = fparams["logModetmp"]
        rparams["removezeroes"] = 1
        rparams["outmatrix"] = join(system_params["scratch"], "outmatrix")
        reportObj = {}

        provenance = [{}]
        if "provenance" in ctx:
            provenance = ctx["provenance"]
        # add additional info to provenance here, in this case the input data object reference
        provenance[0]["input_ws_objects"] = [workspace + "/" + fparams["ws_cuffdiff_id"]]

        report = ""
        if fparams["pairs"] != 0:

            try:
                filtered_matrix = script_util2.filter_expression_matrix(fparams, system_params)
                self.__LOGGER.info("matrix is " + filtered_matrix)
                fparams["infile"] = join(system_params["scratch"], "gene_exp.diff.filter")
                fparams["outfile"] = join(system_params["scratch"], "gene_exp.diff.filter.genelist")
                genelist_filtered_matrix_file = script_util2.get_gene_list_from_filter_step(fparams)
                rparams["genelist"] = filtered_matrix
            except:
                report += "There was an error in creating expression matrix"
                report += "No differentially expressed genes were found"
                report += "Please change / double check  your filtering criteria"

                reportObj = {"objects_created": [], "text_message": report}

                reportName = "create_interactive_heatmap_de_genes_old_" + str(hex(uuid.getnode()))
                report_info = ws_client.save_objects(
                    {
                        "workspace": fparams["workspace_name"],
                        "objects": [
                            {
                                "type": "KBaseReport.Report",
                                "data": reportObj,
                                "name": reportName,
                                "meta": {},
                                "hidden": 1,  # important!  make sure the report is hidden
                                "provenance": provenance,
                            }
                        ],
                    }
                )[0]
                print ("saved Report: " + pformat(report_info))

                returnVal = {
                    "report_name": reportName,
                    "report_ref": str(report_info[6]) + "/" + str(report_info[0]) + "/" + str(report_info[4]),
                }

                return [returnVal]

        try:
            # Prepare output object.
            outjson = False

            roptstr_basic_heatmap_rep = script_util2.get_command_line_heatmap_basic(rparams)

            # Run R script to run cummerbund json and update the cummerbund output json file
            # Prepare output object.
            outputobject = dict()

            # Prepare output plot list
            cummerbundplotset = []

            # List of plots to generate
            plotlist = [
                {
                    "roptstr": roptstr_basic_heatmap_rep,
                    "title": "Heatmap",
                    "description": "Heatmap",
                    "exp": fparams["ws_expression_matrix_id"],
                }
            ]
            fparams["cummerbundplotset"] = cummerbundplotset
            # Iterate through the plotlist and generate the images and json files.
            for plot in plotlist:
                fparams["title"] = plot["title"]
                fparams["description"] = plot["description"]

                status = script_util2.rplotanduploadinteractive(system_params, fparams, rparams, plot["roptstr"])
                if status == False:
                    self.__LOGGER.info("Problem generating image and json file - " + plot["roptstr"])
                else:

                    self.__LOGGER.info(status)

                    outjson = status
                    self.__LOGGER.info("xxxxxx1")
                    with open("{0}/{1}".format(self.__SCRATCH, outjson), "r") as et2:

                        eo2 = json.load(et2)
                        genome_ref = s_res[0]["data"]["genome_id"]
                        eo2["type"] = "log2_level"
                        eo2["genome_ref"] = genome_ref
                        self.__LOGGER.info("xxxxxx2")
                        self.__LOGGER.info(workspace + self.__SCRATCH + outjson + plot["exp"])
                        res = ws_client.save_objects(
                            {
                                "workspace": workspace,
                                "objects": [
                                    {"type": "KBaseFeatureValues.ExpressionMatrix", "data": eo2, "name": plot["exp"]}
                                ],
                            }
                        )

                        info = res[0]
                        self.__LOGGER("done uploading exp")
                        report = "Successfully created expression matrix"
                        reportObj = {
                            "objects_created": [
                                {
                                    "ref": str(info[6]) + "/" + str(info[0]) + "/" + str(info[4]),
                                    "description": "Expression matrix",
                                }
                            ],
                            "text_message": report,
                        }

        except:
            report += "There was an error in generating expression matrix"
            reportObj = {"objects_created": [], "text_message": report}

        reportName = "create_interactive_heatmap_de_genes_" + str(hex(uuid.getnode()))
        report_info = ws_client.save_objects(
            {
                "workspace": fparams["workspace_name"],
                "objects": [
                    {
                        "type": "KBaseReport.Report",
                        "data": reportObj,
                        "name": reportName,
                        "meta": {},
                        "hidden": 1,  # important!  make sure the report is hidden
                        "provenance": provenance,
                    }
                ],
            }
        )[0]
        print ("saved Report: " + pformat(report_info))

        returnVal = {
            "report_name": reportName,
            "report_ref": str(report_info[6]) + "/" + str(report_info[0]) + "/" + str(report_info[4]),
        }

        # END create_interactive_heatmap_de_genes_old

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError(
                "Method create_interactive_heatmap_de_genes_old return value "
                + "returnVal is not type dict as required."
            )
        # return the results
        return [returnVal]

コード例 #10

ファイルを表示

ファイル: kb_cummerbundImpl.py プロジェクト: pranjan77/kb_cummerbund

    def create_expression_matrix(self, ctx, expressionMatrixParams):
        """
        :param expressionMatrixParams: instance of type
           "expressionMatrixParams" -> structure: parameter "workspace_name"
           of type "workspace_name" (workspace name of the object), parameter
           "ws_cuffdiff_id" of type "ws_cuffdiff_id" (@id ws
           KBaseRNASeq.RNASeqCuffdiffdifferentialExpression), parameter
           "ws_expression_matrix_id" of type "ws_expression_matrix_id" (@id
           ws KBaseFeatureValues.ExpressionMatrix), parameter
           "include_replicates" of type "bool" (indicates true or false
           values, false <= 0, true >=1)
        :returns: instance of type "ws_expression_matrix_id" (@id ws
           KBaseFeatureValues.ExpressionMatrix)
        """
        # ctx is the context object
        # return variables are: returnVal
        # BEGIN create_expression_matrix

        params = expressionMatrixParams
        returnVal = params["ws_expression_matrix_id"]
        # Set up workspace client
        user_token = ctx["token"]
        workspace = params["workspace_name"]
        ws_client = Workspace(url=self.__WS_URL, token=user_token)

        # Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{"name": params["ws_cuffdiff_id"], "workspace": params["workspace_name"]}])

        # Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal

        cuffdiff_dir = join(self.__SCRATCH, "cuffdiffData/cuffdiff")
        cuffdiff_dir = script_util2.extract_cuffdiff_data(
            self.__LOGGER, self.__SHOCK_URL, self.__SCRATCH, s_res, user_token
        )
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        if cuffdiff_dir is False:
            return returnVal

        # Run R script to get fpkmgenematrix.R

        # Prepare output object.
        outjson = False
        # outjson = "repfpkmgenematrix.R.matrix.txt.json";

        if params["include_replicates"] == 0:
            scriptfile = "fpkmgenematrix.R"
            outjson = script_util2.generate_and_upload_expression_matrix(
                self.__LOGGER,
                self.__SCRATCH,
                self.__RSCRIPTS,
                scriptfile,
                self.__SHOCK_URL,
                self.__HS_URL,
                user_token,
                cuffdiff_dir,
                self.__WS_URL,
                workspace,
            )

        else:
            scriptfile = "repfpkmgenematrix.R"
            outjson = script_util2.generate_and_upload_expression_matrix(
                self.__LOGGER,
                self.__SCRATCH,
                self.__RSCRIPTS,
                scriptfile,
                self.__SHOCK_URL,
                self.__HS_URL,
                user_token,
                cuffdiff_dir,
                self.__WS_URL,
                workspace,
            )

        if outjson is False:
            self.__LOGGER.info("Creation of expression matrix failed")
            return returnVal
        with open("{0}/{1}".format(self.__SCRATCH, outjson), "r") as et:
            eo = json.load(et)
        eo["type"] = "untransformed"
        genome_ref = s_res[0]["data"]["genome_id"]
        eo["genome_ref"] = genome_ref

        self.__LOGGER.info(workspace + self.__SCRATCH + outjson + params["ws_expression_matrix_id"])
        ws_client.save_objects(
            {
                "workspace": workspace,
                "objects": [
                    {
                        "type": "KBaseFeatureValues.ExpressionMatrix",
                        "data": eo,
                        "name": params["ws_expression_matrix_id"],
                    }
                ],
            }
        )

        # END create_expression_matrix

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError(
                "Method create_expression_matrix return value " + "returnVal is not type basestring as required."
            )
        # return the results
        return [returnVal]

コード例 #11

ファイルを表示

    def create_interactive_heatmap_de_genes_old(self, ctx, heatmapParams):
        """
        :param heatmapParams: instance of type "heatmapParams" -> structure:
           parameter "workspace" of String, parameter "sample1" of String,
           parameter "sample2" of String, parameter "q_value_cutoff" of
           Double, parameter "log2_fold_change_cutoff" of Double, parameter
           "num_genes" of Long, parameter "ws_cuffdiff_id" of type
           "ws_cuffdiff_id" (@id ws
           KBaseRNASeq.RNASeqCuffdiffdifferentialExpression), parameter
           "ws_expression_matrix_id" of type "ws_expression_matrix_id" (@id
           ws KBaseFeatureValues.ExpressionMatrix), parameter
           "ws_cummerbund_output" of type "ws_cummerbund_output" (@id ws
           KBaseRNASeq.cummerbund_output)
        :returns: instance of type "ResultsToReport" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN create_interactive_heatmap_de_genes_old
        fparams = heatmapParams
        returnVal = "ttt"
        # Set up workspace client
        user_token = ctx['token']
        workspace = fparams['workspace']
        ws_client = Workspace(url=self.__WS_URL, token=user_token)
        system_params = {}
        system_params['token'] = user_token
        system_params['ws_url'] = self.__WS_URL
        system_params['logger'] = self.__LOGGER
        system_params['shock_url'] = self.__SHOCK_URL
        system_params['hs_url'] = self.__HS_URL
        system_params['scratch'] = self.__SCRATCH
        system_params['rscripts'] = self.__RSCRIPTS
        system_params['workspace'] = workspace

        # Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name': fparams['ws_cuffdiff_id'],
            'workspace': fparams['workspace']
        }])

        # Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal
        cuffdiff_dir = join(self.__SCRATCH, "cuffdiffData/cuffdiff")
        cuffdiff_dir = script_util2.extract_cuffdiff_data(self.__LOGGER, self.__SHOCK_URL,
                                                          self.__SCRATCH, s_res, user_token)
        # cuffdiff_dir = "/kb/module/work/cuffdiffData/cuffdiff"
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        # if (cuffdiff_dir is False):
        #    return returnVal
        fparams['cuffdiff_dir'] = cuffdiff_dir
        fparams['infile'] = join(cuffdiff_dir, "gene_exp.diff")
        fparams['outfile'] = join(system_params['scratch'], "gene_exp.diff.filter")

        fparams['pairs'] = 1
        fparams['logModetmp'] = 2

        rparams = {}

        rparams['cuffdiff_dir'] = fparams['cuffdiff_dir']
        rparams['outpng'] = join(system_params['scratch'], "heatmap.png")
        rparams['imageheight'] = 1600
        rparams['imagewidth'] = 800
        rparams['plotscript'] = join(system_params['rscripts'], "heatmapplotinteractive.R")
        rparams['include_replicates'] = 1
        rparams['pairs'] = fparams['pairs']
        rparams['logMode'] = fparams['logModetmp']
        rparams['removezeroes'] = 1
        rparams['outmatrix'] = join(system_params['scratch'], "outmatrix")
        reportObj = {}

        provenance = [{}]
        if 'provenance' in ctx:
            provenance = ctx['provenance']
        # add additional info to provenance here, in this case the input data object reference
        provenance[0]['input_ws_objects'] = [workspace + '/' + fparams['ws_cuffdiff_id']]

        report = ""
        if (fparams['pairs'] != 0):

            try:
                filtered_matrix = script_util2.filter_expression_matrix(fparams, system_params)
                self.__LOGGER.info("matrix is " + filtered_matrix)
                fparams['infile'] = join(system_params['scratch'], "gene_exp.diff.filter")
                fparams['outfile'] = join(system_params['scratch'], "gene_exp.diff.filter.genelist")
                genelist_filtered_matrix_file = script_util2.get_gene_list_from_filter_step(fparams)
                rparams['genelist'] = filtered_matrix
            except:
                report += "There was an error in creating expression matrix"
                report += "No differentially expressed genes were found"
                report += "Please change / double check  your filtering criteria"

                reportObj = {
                    'objects_created': [],
                    'text_message': report
                }

                reportName = 'create_interactive_heatmap_de_genes_old_' + str(hex(uuid.getnode()))
                report_info = ws_client.save_objects({
                    'workspace': fparams['workspace'],
                    'objects': [
                        {
                            'type': 'KBaseReport.Report',
                            'data': reportObj,
                            'name': reportName,
                            'meta': {},
                            'hidden': 1,  # important!  make sure the report is hidden
                            'provenance': provenance
                        }
                    ]})[0]
                print('saved Report: ' + pformat(report_info))

                returnVal = {"report_name": reportName,
                             "report_ref": str(report_info[6]) + '/' + str(
                                 report_info[0]) + '/' + str(report_info[4])}

                return [returnVal]

        try:
            # Prepare output object.
            outjson = False;

            roptstr_basic_heatmap_rep = script_util2.get_command_line_heatmap_basic(rparams)

            # Run R script to run cummerbund json and update the cummerbund output json file
            # Prepare output object.
            outputobject = dict()

            # Prepare output plot list
            cummerbundplotset = []

            # List of plots to generate
            plotlist = [

                {'roptstr': roptstr_basic_heatmap_rep,
                 'title': "Heatmap",
                 'description': "Heatmap",
                 'exp': fparams['ws_expression_matrix_id']
                 }

            ]
            fparams['cummerbundplotset'] = cummerbundplotset
            # Iterate through the plotlist and generate the images and json files.
            for plot in plotlist:
                fparams['title'] = plot['title']
                fparams['description'] = plot['description']

                status = script_util2.rplotanduploadinteractive(system_params, fparams, rparams,
                                                                plot['roptstr'])
                if status == False:
                    self.__LOGGER.info(
                        "Problem generating image and json file - " + plot["roptstr"])
                    report = "Error: Please select a different cutoff criteria. None of the genes passed fold change and q-value-cutoff. "
                    report += "Failed to create expression  matrix with differentially expressed genes(" + \
                              fparams['ws_expression_matrix_id'] + "). No genes to show on heatmap."
                    reportObj = {
                        'objects_created': [],
                        'text_message': report
                    }
                    reportName = 'create_interactive_heatmap_de_genes_old_' + str(
                        hex(uuid.getnode()))
                    report_info = ws_client.save_objects({
                        'workspace': fparams['workspace'],
                        'objects': [
                            {
                                'type': 'KBaseReport.Report',
                                'data': reportObj,
                                'name': reportName,
                                'meta': {},
                                'hidden': 1,  # important!  make sure the report is hidden
                                'provenance': provenance
                            }
                        ]})[0]
                    print('saved Report: ' + pformat(report_info))

                    returnVal = {"report_name": reportName,
                                 "report_ref": str(report_info[6]) + '/' + str(
                                     report_info[0]) + '/' + str(report_info[4])}

                    return [returnVal]


                else:

                    self.__LOGGER.info(status)

                    outjson = status
                    self.__LOGGER.info('5')
                    with open("{0}/{1}".format(self.__SCRATCH, outjson), 'r') as et2:

                        eo2 = json.load(et2)
                        genome_ref = s_res[0]['data']['genome_id']
                        eo2['type'] = 'log2_level'
                        eo2['genome_ref'] = genome_ref
                        self.__LOGGER.info('3')
                        self.__LOGGER.info(workspace + self.__SCRATCH + outjson + plot['exp'])
                        try:
                            res = ws_client.save_objects({'workspace': workspace,
                                                          'objects': [{
                                                                          'type': 'KBaseFeatureValues.ExpressionMatrix',
                                                                          'data': eo2,
                                                                          'name': plot['exp']
                                                                          }]})
                        except:
                            self.__LOGGER("xxxx6")

        except:
            self.__LOGGER.info('6')
        report = "Successfully created expression matrix"
        reportObj = {
            'objects_created': [],
            'text_message': report
        }

        self.__LOGGER.info('7')

        reportName = 'create_interactive_heatmap_de_genes_old_' + str(hex(uuid.getnode()))
        report_info = ws_client.save_objects({
            'workspace': fparams['workspace'],
            'objects': [
                {
                    'type': 'KBaseReport.Report',
                    'data': reportObj,
                    'name': reportName,
                    'meta': {},
                    'hidden': 1,  # important!  make sure the report is hidden
                    'provenance': provenance
                }
            ]})[0]
        print('saved Report: ' + pformat(report_info))

        returnVal = {"report_name": reportName,
                     "report_ref": str(report_info[6]) + '/' + str(report_info[0]) + '/' + str(
                         report_info[4])}

        #END create_interactive_heatmap_de_genes_old

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method create_interactive_heatmap_de_genes_old return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

コード例 #12

ファイルを表示

    def generate_cummerbund_plot2(self, ctx, cummerbundstatParams):
        """
        :param cummerbundstatParams: instance of type "cummerbundstatParams"
           -> structure: parameter "workspace" of String, parameter
           "ws_cuffdiff_id" of type "ws_cuffdiff_id" (@id ws
           KBaseRNASeq.RNASeqCuffdiffdifferentialExpression), parameter
           "ws_cummerbund_output" of type "ws_cummerbund_output" (@id ws
           KBaseRNASeq.cummerbund_output), parameter "ws_diffstat_output" of
           type "ws_diffstat_output" (Differential stat workspace id)
        :returns: instance of type "ws_cummerbund_output" (@id ws
           KBaseRNASeq.cummerbund_output)
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN generate_cummerbund_plot2
        params = cummerbundstatParams
        returnVal = params['ws_cummerbund_output']

        # Set up workspace client
        user_token = ctx['token']
        ws_client = Workspace(url=self.__WS_URL, token=user_token)

        # Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name': params['ws_cuffdiff_id'],
            'workspace': params['workspace']
        }])
        print "Getting genome info"

        genome_ref = s_res[0]['data']['genome_id']
        # genome_ref = '2702/6/2'
        # genome_ref = '2702/26/1'
        # genome_ref = '2229/21/10'
        print genome_ref
        gaapi = GenomeAnnotationAPI(self.callbackURL, token=user_token)
        genome = gaapi.get_genome_v1({"genomes": [{"ref": genome_ref}],
                                      "included_fields": ["scientific_name"],
                                      "included_feature_fields": ["id", "function", "type"
                                                                  ]})["genomes"][0]["data"]
        genome_dict = {}
        features = genome['features']
        for feature in features:
            id = feature['id']
            try:
                function = feature['function']
                if not function:
                    function = 'Unknown'
            except:
                function = 'Unknown'
            genome_dict[id] = function

        # Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal

        cuffdiff_dir = script_util2.extract_cuffdiff_data(self.__LOGGER, self.__SHOCK_URL,
                                                          self.__SCRATCH, s_res, user_token)
        self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        if (cuffdiff_dir is False):
            return returnVal

        # Run R script to run cummerbund json and update the cummerbund output json file
        # Prepare output object.
        outputobject = dict()

        # Prepare output plot list
        cummerbundplotset = []
        # List of plots to generate
        plotlist = [
            {'file': "dispersionplot.R",
             'title': "Dispersion plot",
             'description': "Dispersion plot is the quality measure of the data. It estimates deviation from threshold against counts in FPKM."},

            {'file': "fpkmscvplot.R",
             'title': "Genes CV plot",
             'description': "The squared coefficient of variation plot is a normalized measure of cross-replicate variability that can be useful for evaluating the quality of RNA-seq data."},

            {'file': "isoformscvplot.R",
             'title': "Isoform CV plot",
             'description': "The squared coefficient of variation plot is a normalized measure of cross-replicate variability that can be useful for evaluating the quality of RNA-seq data.Differences in CV2 can result in lower numbers of differentially expressed isoforms due to a higher degree of variability between replicate fpkm estimates."},

            {'file': "densityplot.R",
             'title': "Density plot",
             'description': "The density plot shows the distribution of FPKM scores across samples"},

            {'file': "csdensityrepplot.R",
             'title': "Replicates density plot",
             'description': "The replicates density plot shows the distribution of FPKM scores across sample replicates"},

            {'file': "boxplot.R",
             'title': "Box plots",
             'description': "The box plots show the FPKM distribution across samples."},

            {'file': "boxrepplot.R",
             'title': "Box plots of replicates",
             'description': "The box plots of replicates show the FPKM distribution across sample replicates."},

            {'file': "pairwisescatterplots.R",
             'title': "Pairwise scatter plots",
             'description': "The scatterplots show differences in gene expression between two samples. If two samples are identical, all genes will fall on the mid-line."},

            {'file': "volcanomatrixplot.R",
             'title': "Volcano matrix plots",
             'description': "Volcano matrix plot is a scatter plot that also identifies differentially expressed genes (by color) between samples based on log2 fold change cut off."},

            {'file': "pcaplot.R",
             'title': "PCA plot",
             'description': "Principal Component Analysis (PCA) is an informative approach for dimensionality reduction for exploring teh relationship between sample conditions."},

            {'file': "pcarepplot.R",
             'title': "PCA plot including replicates",
             'description': "Principal Component Analysis (PCA) is an informative approach for dimensionality reduction for exploring teh relationship between sample conditions including replicates."},

            {'file': "mdsplot.R",
             'title': "Multi-dimensional scaling plot",
             'description': "Multi-dimensional scaling plots are similar to PCA plots and useful for determining the major sources of variation in the dataset. "},

            {'file': "mdsrepplot.R",
             'title': "Multi-dimensional scaling plot including replicates",
             'description': "Multi-dimensional scaling plot including replicates are  similar to PCA plots and useful for determining the major sources of variation in the dataset with replicates. These can be useful to determine any systematic bias that may be present between conditions."}
        ]

        # Iterate through the plotlist and generate the images and json files.
        for plot in plotlist:
            status = script_util2.rplotandupload(self.__LOGGER, self.__SCRATCH, self.__RSCRIPTS,
                                                 plot['file'], self.__SHOCK_URL, self.__HS_URL,
                                                 user_token,
                                                 cummerbundplotset, plot['title'],
                                                 plot['description'], cuffdiff_dir)
            if status == False:
                self.__LOGGER.info("Problem generating image and json file - " + plot["file"])

        # Populate the output object
        outputobject['cummerbundplotSet'] = cummerbundplotset

        # TODO: Need to figure out how to get rnaseq experiment id
        outputobject['rnaseq_experiment_id'] = "rnaseq_experiment_id"
        outputobject['cuffdiff_input_id'] = params['ws_cuffdiff_id']

        res = ws_client.save_objects({
            "workspace": params['workspace'],
            "objects": [{
                "type": "KBaseRNASeq.cummerbund_output",
                "data": outputobject,
                "name": params["ws_cummerbund_output"]}]
        })

        infile = join(cuffdiff_dir, "gene_exp.diff")
        outfile = join(cuffdiff_dir, "gene_exp_diff.out")
        x = v.volcano_plot_data_parse_and_upload(infile, outfile, genome_dict)
        with open(outfile) as f:
            statdata = json.load(f)
        res = ws_client.save_objects({
            "workspace": params['workspace'],
            "objects": [{
                "type": "KBaseRNASeq.DifferentialExpressionStat",
                "data": statdata,
                "name": params["ws_diffstat_output"]}]
        })

        #END generate_cummerbund_plot2

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method generate_cummerbund_plot2 return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]