Ejemplo n.º 1
0
    def prepare(self): 
        # for quick testing, we recover parameters here
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        tophat_dir = self.directory

        try:
               #sample ,bowtie_index = ws_client.get_objects(
               #                         [{'name' : params['sampleset_id'],'workspace' : params['ws_id']},
               #                          { 'name' : params['bowtie_index'], 'workspace' : params['ws_id']}])
               sample = script_util.ws_get_obj(logger, ws_client, params['ws_id'],params['sampleset_id'])[0]
               bowtie_index = script_util.ws_get_obj(logger, ws_client, params['ws_id'],params['bowtie_index'])[0]
               self.sample = sample
        except Exception,e:
               logger.exception("".join(traceback.format_exc()))
               raise ValueError(" Error Downloading objects from the workspace ")
Ejemplo n.º 2
0
    def prepare(self): 
        # for quick testing, we recover parameters here
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        hisat2_dir = self.directory

        try:
               #sample,annotation_name = ws_client.get_objects(
               #                         [{ 'name' : params['sampleset_id'], 'workspace' : params['ws_id']},
               #                         { 'name' : params['genome_id'], 'workspace' : params['ws_id']}])
               sample = script_util.ws_get_obj(logger, ws_client, params['ws_id'],params['sampleset_id'])[0]
               annotation_name = script_util.ws_get_obj(logger, ws_client, params['ws_id'],params['genome_id'])[0]
               self.sample =sample 
        except Exception,e:
               logger.exception("".join(traceback.format_exc()))
               raise ValueError(" Error Downloading objects from the workspace ")
Ejemplo n.º 3
0
    def runEach(self, task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']

        read_sample = task_params['job_id']
        condition = task_params['label']
        directory = task_params['hisat2_dir']
        ws_id = task_params['ws_id']
        genome_id = task_params['annotation_id']
        sampleset_id = task_params['sampleset_id']

        print "Downloading Read Sample{0}".format(read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
            #r_sample = ws_client.get_objects(
            #                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
            #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]
            #sample_type = r_sample_info[2].split('-')[0]
            r_sample = script_util.ws_get_obj(self.logger, ws_client, ws_id,
                                              read_sample)[0]
            sample_type = script_util.ws_get_type_name(self.logger, ws_client,
                                                       ws_id, read_sample)
            sample_name = script_util.ws_get_obj_name4file(
                self.logger, ws_client, ws_id, read_sample)
            input_direc = os.path.join(
                directory,
                sample_name.split('.')[0] + "_hisat2_input")
            if not os.path.exists(input_direc): os.mkdir(input_direc)
            output_name = sample_name.split('.')[0] + "_hisat2_alignment"
            output_dir = os.path.join(directory, output_name)
            if not os.path.exists(output_dir): os.mkdir(output_dir)
            print directory
            base = handler_util.get_file_with_suffix(directory, ".1.ht2")
            print base
            hisat2_base = os.path.join(directory, base)
            ### Adding advanced options to Bowtie2Call
            hisat2_cmd = ''
            hisat2_cmd += (' -p {0}'.format(self.num_threads))
            if ('quality_score' in params
                    and params['quality_score'] is not None):
                hisat2_cmd += (' --' + params['quality_score'])
            if ('alignment_type' in params
                    and params['alignment_type'] is not None):
                hisat2_cmd += (' --' + params['alignment_type'])
            if ('trim5' in params and params['trim5'] is not None):
                hisat2_cmd += (' --trim5 ' + str(params['trim5']))
            if ('trim3' in params and params['trim3'] is not None):
                hisat2_cmd += (' --trim3 ' + str(params['trim3']))
            if ('np' in params and params['np'] is not None):
                hisat2_cmd += (' --np ' + str(params['np']))
            if ('minins' in params and params['minins'] is not None):
                hisat2_cmd += (' --minins ' + str(params['minins']))
            if ('maxins' in params and params['maxins'] is not None):
                hisat2_cmd += (' --maxins ' + str(params['maxins']))
            #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
            if ('min_intron_length' in params
                    and params['min_intron_length'] is not None):
                hisat2_cmd += (' --min-intronlen ' +
                               str(params['min_intron_length']))
            if ('max_intron_length' in params
                    and params['max_intron_length'] is not None):
                hisat2_cmd += (' --max-intronlen ' +
                               str(params['max_intron_length']))
            if ('no_spliced_alignment' in params
                    and params['no_spliced_alignment'] != 0):
                hisat2_cmd += (' --no-spliced-alignment')
            if ('transcriptome_mapping_only' in params
                    and params['transcriptome_mapping_only'] != 0):
                hisat2_cmd += (' --transcriptome-mapping-only')
            if ('tailor_alignments' in params
                    and params['tailor_alignments'] is not None):
                hisat2_cmd += (' --' + params['tailor_alignments'])
            out_file = output_dir + "/accepted_hits.sam"
            ####
            try:
                sample_ref = script_util.ws_get_ref(self.logger, ws_client,
                                                    ws_id, read_sample)
                ds = script_util.ru_reads_download(self.logger, sample_ref,
                                                   input_direc, token)
                self.logger.info(ds)
            except Exception, e:
                self.logger.exception(e)
                raise Exception(
                    "Unable to download reads file , {0}".format(read_sample))
            if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary':
                lib_type = 'SingleEnd'
                hisat2_cmd += " -U {0} -x {1} -S {2}".format(
                    ds['fwd'], hisat2_base, out_file)
            if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
                lib_type = 'PairedEnd'
                if sample_type == 'KBaseAssembly.PairedEndLibrary':
                    if ('orientation' in params
                            and params['orientation'] is not None):
                        hisat2_cmd += (' --' + params['orientation'])
                else:
                    # TODO: the following can be read from PEL object
                    if ('orientation' in params
                            and params['orientation'] is not None):
                        hisat2_cmd += (' --' + params['orientation'])
                hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(
                    ds['fwd'], ds['rev'], hisat2_base, out_file)
            #if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
            #        lib_type = 'SingleEnd'
            #        if sample_type == 'KBaseAssembly.SingleEndLibrary':
            #            read_id = r_sample['data']['handle']['id']
            #            read_name =  r_sample['data']['handle']['file_name']
            #        else:
            #            read_id = r_sample['data']['lib']['file']['id']
            #            read_name =  r_sample['data']['lib']['file']['file_name']
            #        try:
            #                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token)
            #                hisat2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),hisat2_base,out_file)
            #        except Exception,e:
            #                self.logger.exception(e)
            #                raise Exception( "Unable to download shock file , {0}".format(read_name))
            #if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
            #        lib_type = 'PairedEnd'
            #        if sample_type == 'KBaseAssembly.PairedEndLibrary':
            #            if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
            #            read1_id = r_sample['data']['handle_1']['id']
            #            read1_name = r_sample['data']['handle_1']['file_name']
            #            read2_id = r_sample['data']['handle_2']['id']
            #            read2_name = r_sample['data']['handle_2']['file_name']
            #        else:
            #            # TODO: the following can be read from PEL object
            #            if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
            #            read1_id = r_sample['data']['lib1']['file']['id']
            #            read1_name = r_sample['data']['lib1']['file']['file_name']
            #            read2_id = r_sample['data']['lib2']['file']['id']
            #            read2_name = r_sample['data']['lib2']['file']['file_name']
            #        try:
            #                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token)
            #                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token)
            #                hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(input_direc,read2_name),hisat2_base,out_file)
            #        except Exception,e:
            #                logger.exception(e)
            #                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
            try:
                self.logger.info("Executing: hisat2 {0}".format(hisat2_cmd))
                cmdline_output = script_util.runProgram(
                    self.logger, "hisat2", hisat2_cmd, None, directory)
            except Exception, e:
                logger.exception(e)
                raise Exception("Failed to run command {0}".format(hisat2_cmd))
Ejemplo n.º 4
0
    def collect(self):
        params = self.method_params
        ws_client = self.common_params['ws_client']
        hs_client = self.common_params['hs_client']

        ws_id = params['ws_id']
        #rscripts_dir = self.common_params['rscripts_dir']
        rscripts_dir = '/kb/module/rscripts'

        token = self.common_params['user_token']
        diffexp_dir = self.directory
        logger = self.logger
        logger.info(
            'in DiffExpforBallgown.collect, method params (params) are')
        logger.info(pformat(params))
        output_object_name = params['output_obj_name']
        output_csv = "ballgown_diffexp.tsv"
        volcano_plot_file = "volcano_plot.png"
        stringtie_dir_prefix = "StringTie_outdir_"

        #
        #  1) need a pattern RE to match all the StringTie subdirs, so prefix all
        #     unzipped dirs with "stringtie_out_"
        #  2) need a group identifier string i.e. "111000"
        #

        ballgown_set_info = rnaseq_util.get_info_and_download_for_ballgown(
            logger, ws_client, hs_client, ws_id, self.urls, diffexp_dir,
            stringtie_dir_prefix, params['expressionset_id'], token)
        logger.info('back from download_for_ballgown(), ballgown_set_info are')
        logger.info(pformat(ballgown_set_info))

        sample_dir_group_file = "sample_dir_group_table"  # output file
        group_list = rnaseq_util.create_sample_dir_group_file(
            logger, ws_client, ws_id, ballgown_set_info['subdirs'],
            params['group_name1'], params['expr_ids1'], params['group_name2'],
            params['expr_ids2'], sample_dir_group_file)

        ballgown_output_dir = os.path.join(diffexp_dir, "ballgown_out")
        logger.info("ballgown output dir is {0}".format(ballgown_output_dir))
        handler_util.setupWorkingDir(logger, ballgown_output_dir)

        logger.info("about to run_ballgown_diff_exp")
        rnaseq_util.run_ballgown_diff_exp(logger, rscripts_dir, diffexp_dir,
                                          sample_dir_group_file,
                                          ballgown_output_dir, output_csv,
                                          volcano_plot_file)

        logger.info(
            "back from run_ballgown_diff_exp, about to load diff exp matrix file"
        )
        diff_expr_matrix = rnaseq_util.load_diff_expr_matrix(
            ballgown_output_dir, output_csv)  # read file before its zipped

        logger.info("about to load ballgout output into workspace")
        de_ws_save_obj_data = rnaseq_util.load_ballgown_output_into_ws(
            logger,
            ws_id,
            ws_client,
            hs_client,
            token,
            diffexp_dir,
            ballgown_output_dir,
            self.details["used_tool"],
            self.details["tool_version"],
            ballgown_set_info[
                'sample_expression_ids'],  # for sample ids? Is this good?
            group_list,  # conditions
            ballgown_set_info['genome_id'],  # genome_id
            ballgown_set_info['expressionset_id'],  # expressionset_id
            ballgown_set_info['alignmentSet_id'],  # alignmentset_id
            ballgown_set_info['sampleset_id'],  # sampleset_id
            output_object_name)
        logger.info(
            "back from loading ballgown output into workspace, object save data is "
        )
        logger.info(pformat(de_ws_save_obj_data))

        max_num_genes = sys.maxint  # default
        if 'maximum_num_genes' in params:
            if (params['maximum_num_genes'] != None):
                max_num_genes = params['maximum_num_genes']

        # this returns a list of gene ids passing the specified cuts, ordered by
        # descending fold_change
        selected_gene_list = rnaseq_util.filter_genes_diff_expr_matrix(
            diff_expr_matrix, params['fold_scale_type'],
            params['alpha_cutoff'], params['fold_change_cutoff'],
            max_num_genes)
        #  !!!!! IF selected_gene_list is empty print some kind of message, take no further action

        # get the unfiltered expression matrix
        expression_set_id_name = script_util.ws_get_obj_name(
            logger, ws_client, ws_id, params['expressionset_id'])
        em_name = expression_set_id_name + "_FPKM_ExpressionMatrix"
        logger.info("about to fetch expression matrix  {0}".format(em_name))
        try:
            #emw = ws_client.get_objects( [ { "name": em_name, "workspace": ws_id } ] )[0]
            emw = script_util.ws_get_obj(logger, ws_client, ws_id, em_name)[0]
        except:
            raise Exception(
                "unable to retrieve expression matrix object {0} from workspace {1}"
                .format(em_name, ws_id))
        emo = emw["data"]
        # filter it
        filtered_emo = rnaseq_util.filter_expr_matrix_object(
            emo, selected_gene_list)
        # save it
        logger.info("saving emo em_name {0}".format(
            params["filtered_expr_matrix"]))
        try:
            ret = ws_client.save_objects({
                'workspace':
                ws_id,
                'objects': [{
                    'type': 'KBaseFeatureValues.ExpressionMatrix',
                    'data': filtered_emo,
                    'name': params["filtered_expr_matrix"]
                }]
            })
        except:
            raise Exception("failed to save object ")
        logger.info("ws save return:\n" + pformat(ret))

        logger.info("saving volcano plot as report object")
        report_object_name = expression_set_id_name + "_plot_report"

        output_obj_ref = script_util.ws_get_ref(logger, ws_client, ws_id,
                                                output_object_name)
        em_obj_ref = script_util.ws_get_ref(logger, ws_client, ws_id,
                                            params["filtered_expr_matrix"])

        plot_report_object_name = rnaseq_util.create_and_save_volcano_plot_report(
            logger, ws_client, ws_id, self.urls['callback_url'], token,
            ballgown_output_dir, volcano_plot_file, output_obj_ref, em_obj_ref,
            report_object_name)
        #logger.info( "plot_report_object")
        #logger.info( pformat( plot_report_object_name ) )
        # THIS NEEDS TO BE AN INPUT PARAMETER IN SPEC FILE
        #iltered_expr_matrix_name = expressionset_id + "_filtered_fpkm"
        #e_em_save_obj_data = created_and_save_filtered_expr_matrix( logger,
        #                                                            ws_client,
        #                                                            ws_id,
        #                                                            token,
        #                                                            expression_set_name,
        #                                                            fold_scale_type,      #"linear", "log2+1", "log10+1"
        #                                                            alpha_cutoff,
        #                                                            q_value_cutoff,
        #                                                            log2_fold_change_cutoff,
        #                                                            maximum_num_genes,
        #                                                            filtered_expr_matrix_name
        #                                                           )
        #logger.info( "plot_report_object_name[1] is {0}".format( plot_report_object_name[1] ) )
        #logger.info( "plot_report_ref is {0}/{1}/{2}".format( plot_report_object_name[6], plot_report_object_name[0],plot_report_object_name[4] ) )
        returnVal = {
            'diff_expr_object':
            output_object_name,
            'filtered_expression_maxtrix':
            params["filtered_expr_matrix"],
            'report_name':
            plot_report_object_name[1],
            'report_ref':
            "{0}/{1}/{2}".format(plot_report_object_name[6],
                                 plot_report_object_name[0],
                                 plot_report_object_name[4]),
            'workspace':
            ws_id
        }

        self.returnVal = returnVal
Ejemplo n.º 5
0
    def runEach(self,task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        
        read_sample = task_params['job_id']
        condition = task_params['label']
        directory = task_params['tophat_dir']
        ws_id = task_params['ws_id']
        genome_id = task_params['annotation_id']
        sampleset_id = task_params['sampleset_id']
	gtf_file = task_params['gtf_file']

        print "Downloading Read Sample{0}".format(read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
		#r_sample = ws_client.get_objects(
                #                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
                r_sample = script_util.ws_get_obj(logger,ws_client, ws_id, read_sample)[0]
		#r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]	
		#sample_type = r_sample_info[2].split('-')[0]
                sample_type = script_util.ws_get_type_name(logger, ws_client, ws_id, read_sample)
                sample_name = script_util.ws_get_obj_name4file(self.logger, ws_client, ws_id, read_sample)
		output_name = sample_name.split('.')[0]+"_tophat_alignment"
		output_dir = os.path.join(directory,output_name)
	        #if not os.path.exists(output_dir): os.makedirs(output_dir)
            	#out_file = output_dir +"/accepted_hits.sam"
            	bowtie2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".rev.1.bt2"))
                ### Adding advanced options to Bowtie2Call
		tophat_cmd = (' -p '+str(self.num_threads))
            	if('max_intron_length' in params and params['max_intron_length'] is not None ) : tophat_cmd += (' -I '+str(params['max_intron_length']))
            	if('min_intron_length' in params and params['min_intron_length'] is not None ): tophat_cmd += (' -i '+str(params['min_intron_length']))
            	if('min_anchor_length' in params and params['min_anchor_length'] is not None ): tophat_cmd += (' -a '+str(params['min_anchor_length']))
            	if('read_edit_dist' in params and params['read_edit_dist'] is not None ) : tophat_cmd += (' --read-edit-dist '+str(params['read_edit_dist']))
            	if('read_gap_length' in params and params['read_gap_length'] is not None) : tophat_cmd += (' --read-gap-length '+str(params['read_gap_length']))
            	if('read_mismatches' in params and params['read_mismatches'] is not None) : tophat_cmd += (' -N '+str(params['read_mismatches']))
            	if('library_type' in params and params['library_type']  is not None ) : tophat_cmd += (' --library-type ' + params['library_type'])
            	if('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1) : tophat_cmd += ' --report-secondary-alignments'
            	if('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search'
            	if('preset_options' in params and params['preset_options'] is not None ): tophat_cmd += ' --'+params['preset_options']
                #out_file = output_dir +"/accepted_hits.sam"
                try:
                        sample_ref = script_util.ws_get_ref(self.logger, ws_client, ws_id, read_sample)
                        ds = script_util.ru_reads_download(self.logger, sample_ref,directory, token)
                except Exception,e:
                        self.logger.exception(e)
                        raise Exception( "Unable to download reads file , {0}".format(read_sample))
                if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
                        lib_type = 'SingleEnd'
                	tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,ds['fwd'])
                if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
                        lib_type = 'PairedEnd'
                        if sample_type == 'KBaseAssembly.PairedEndLibrary':
                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
                        else:
                            # TODO: the following can be read from PEL object
                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
                        tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,ds['fwd'],ds['rev'])

#                if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
#                        lib_type = 'SingleEnd'
#                        if sample_type == 'KBaseAssembly.SingleEndLibrary':
#                            read_id = r_sample['data']['handle']['id']
#                            read_name =  r_sample['data']['handle']['file_name']
#                        else:
#                            read_id = r_sample['data']['lib']['file']['id']
#                            read_name =  r_sample['data']['lib']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token)
#                		tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name))
#                        except Exception,e:
#                                self.logger.exception(e)
#                                raise Exception( "Unable to download shock file , {0}".format(read_name))
#                if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
#                        lib_type = 'PairedEnd'
#                        if sample_type == 'KBaseAssembly.PairedEndLibrary':
#                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['handle_1']['id']
#                            read1_name = r_sample['data']['handle_1']['file_name']
#                            read2_id = r_sample['data']['handle_2']['id']
#                            read2_name = r_sample['data']['handle_2']['file_name']
#                        else:
#                            # TODO: the following can be read from PEL object
#                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['lib1']['file']['id']
#                            read1_name = r_sample['data']['lib1']['file']['file_name']
#                            read2_id = r_sample['data']['lib2']['file']['id']
#                            read2_name = r_sample['data']['lib2']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token)
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token)
#                		tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name))
#                        except Exception,e:
#                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                try:
                        self.logger.info("Executing: tophat {0}".format(tophat_cmd))
                        cmdline_output, cmd_err = script_util.runProgram(self.logger,"tophat",tophat_cmd,None,directory)
                except Exception,e:
                        raise Exception("Failed to run command {0}\n{1}\n{2}".format(tophat_cmd,cmdline_output,cmd_err))
Ejemplo n.º 6
0
    def runEach(self,task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        
        read_sample = task_params['job_id']
        condition = task_params['label']
        directory = task_params['bowtie2_dir']
        ws_id = task_params['ws_id']
        genome_id = task_params['annotation_id']
        sampleset_id = task_params['sampleset_id']

        print "Downloading Read Sample{0}".format(read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
                #r_sample = ws_client.get_objects(
                #                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
                r_sample = script_util.ws_get_obj(logger,ws_client, ws_id, read_sample)[0]
                #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]
                #sample_type = r_sample_info[2].split('-')[0]
                sample_type = script_util.ws_get_type_name(logger, ws_client, ws_id, read_sample)
                sample_name = script_util.ws_get_obj_name4file(self.logger, ws_client, ws_id, read_sample)
                input_direc = os.path.join(directory,sample_name.split('.')[0]+"_bowtie2_input")
                if not os.path.exists(input_direc): os.mkdir(input_direc)
                output_name = sample_name.split('.')[0]+"_bowtie2_alignment"
                output_dir = os.path.join(directory,output_name)
                if not os.path.exists(output_dir): os.mkdir(output_dir)
                base = handler_util.get_file_with_suffix(directory,".rev.1.bt2")
                bowtie2_base =os.path.join(directory,base)
	
                ### Adding advanced options to Bowtie2Call
                bowtie2_cmd = ''
                bowtie2_cmd += ( ' -p {0}'.format(self.num_threads))
		if('quality_score' in params and params['quality_score'] is not None): bowtie2_cmd += ( ' --'+params['quality_score'])
                if('alignment_type' in params and params['alignment_type'] is not None): bowtie2_cmd += ( ' --'+params['alignment_type'] )
                if('preset_options' in params and params['preset_options'] is not None ) and ('alignment_type' in params and params['alignment_type'] is not None):
                        if (params['alignment_type'] == 'local'):
                                 bowtie2_cmd += (' --'+params['preset_options']+'-local')
                        else: bowtie2_cmd += (' --'+params['preset_options'] )
                if('trim5' in params and params['trim5'] is not None): bowtie2_cmd += ( ' --trim5 '+str(params['trim5']))
                if('trim3' in params and params['trim3'] is not None): bowtie2_cmd += ( ' --trim3 '+str(params['trim3']))
                if('np' in params and params['np'] is not None): bowtie2_cmd += ( ' --np '+str(params['np']))
                if('minins' in params and params['minins'] is not None): bowtie2_cmd += ( ' --minins '+str(params['minins']))
                if('maxins' in params and params['maxins'] is not None): bowtie2_cmd += ( ' --maxins '+str(params['maxins']))

                out_file = output_dir +"/accepted_hits.sam"
                ####
                try:
                        sample_ref = script_util.ws_get_ref(self.logger, ws_client, ws_id, read_sample)
                        ds = script_util.ru_reads_download(self.logger, sample_ref,input_direc, token)
                except Exception,e:
                        self.logger.exception(e)
                        raise Exception( "Unable to download reads file , {0}".format(read_sample))
                if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
                        lib_type = 'SingleEnd'
                        bowtie2_cmd += " -U {0} -x {1} -S {2}".format(ds['fwd'],bowtie2_base,out_file)
                if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
                        lib_type = 'PairedEnd'
                        if sample_type == 'KBaseAssembly.PairedEndLibrary':
                            if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
                        else:
                            # TODO: the following can be read from PEL object
                            if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
                        hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(ds['fwd'], ds['rev'],hisat2_base,out_file)
                        bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(ds['fwd'], ds['rev'],bowtie2_base,out_file)
                ###
#                if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
#                        lib_type = 'SingleEnd'
#                        if sample_type == 'KBaseAssembly.SingleEndLibrary':
#                            read_id = r_sample['data']['handle']['id']
#                            read_name =  r_sample['data']['handle']['file_name']
#                        else:
#                            read_id = r_sample['data']['lib']['file']['id']
#                            read_name =  r_sample['data']['lib']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token)
#                                bowtie2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),bowtie2_base,out_file)
#                        except Exception,e:
#                                self.logger.exception(e)
#                                raise Exception( "Unable to download shock file , {0}".format(read_name))
#                if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
#                        lib_type = 'PairedEnd'
#                        if sample_type == 'KBaseAssembly.PairedEndLibrary':
#                            if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['handle_1']['id']
#                            read1_name = r_sample['data']['handle_1']['file_name']
#                            read2_id = r_sample['data']['handle_2']['id']
#                            read2_name = r_sample['data']['handle_2']['file_name']
#                        else:
#                            # TODO: the following can be read from PEL object
#                            if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['lib1']['file']['id']
#                            read1_name = r_sample['data']['lib1']['file']['file_name']
#                            read2_id = r_sample['data']['lib2']['file']['id']
#                            read2_name = r_sample['data']['lib2']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token)
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token)
#                                bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(input_direc,read2_name),bowtie2_base,out_file)
#                        except Exception,e:
#                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                try:
                        self.logger.info("Executing: bowtie2 {0}".format(bowtie2_cmd))
                        cmdline_output = script_util.runProgram(self.logger,"bowtie2",bowtie2_cmd,None,directory)
                except Exception,e:
                        raise Exception("Failed to run command {0}".format(bowtie2_cmd))
Ejemplo n.º 7
0
    def runEach(self, task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']

        read_sample = task_params['job_id']
        condition = task_params['label']
        directory = task_params['tophat_dir']
        ws_id = task_params['ws_id']
        genome_id = task_params['annotation_id']
        sampleset_id = task_params['sampleset_id']
        gtf_file = task_params['gtf_file']

        print "Downloading Read Sample{0}".format(read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
            #r_sample = ws_client.get_objects(
            #                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
            r_sample = script_util.ws_get_obj(logger, ws_client, ws_id,
                                              read_sample)[0]
            #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]
            #sample_type = r_sample_info[2].split('-')[0]
            sample_type = script_util.ws_get_type_name(logger, ws_client,
                                                       ws_id, read_sample)
            sample_name = script_util.ws_get_obj_name4file(
                self.logger, ws_client, ws_id, read_sample)
            output_name = sample_name.split('.')[0] + "_tophat_alignment"
            output_dir = os.path.join(directory, output_name)
            #if not os.path.exists(output_dir): os.makedirs(output_dir)
            #out_file = output_dir +"/accepted_hits.sam"
            bowtie2_base = os.path.join(
                directory,
                handler_util.get_file_with_suffix(directory, ".rev.1.bt2"))
            ### Adding advanced options to Bowtie2Call
            tophat_cmd = (' -p ' + str(self.num_threads))
            if ('max_intron_length' in params
                    and params['max_intron_length'] is not None):
                tophat_cmd += (' -I ' + str(params['max_intron_length']))
            if ('min_intron_length' in params
                    and params['min_intron_length'] is not None):
                tophat_cmd += (' -i ' + str(params['min_intron_length']))
            if ('min_anchor_length' in params
                    and params['min_anchor_length'] is not None):
                tophat_cmd += (' -a ' + str(params['min_anchor_length']))
            if ('read_edit_dist' in params
                    and params['read_edit_dist'] is not None):
                tophat_cmd += (' --read-edit-dist ' +
                               str(params['read_edit_dist']))
            if ('read_gap_length' in params
                    and params['read_gap_length'] is not None):
                tophat_cmd += (' --read-gap-length ' +
                               str(params['read_gap_length']))
            if ('read_mismatches' in params
                    and params['read_mismatches'] is not None):
                tophat_cmd += (' -N ' + str(params['read_mismatches']))
            if ('library_type' in params
                    and params['library_type'] is not None):
                tophat_cmd += (' --library-type ' + params['library_type'])
            if ('report_secondary_alignments' in params
                    and int(params['report_secondary_alignments']) == 1):
                tophat_cmd += ' --report-secondary-alignments'
            if ('no_coverage_search' in params
                    and int(params['no_coverage_search']) == 1):
                tophat_cmd += ' --no-coverage-search'
            if ('preset_options' in params
                    and params['preset_options'] is not None):
                tophat_cmd += ' --' + params['preset_options']
            #out_file = output_dir +"/accepted_hits.sam"
            try:
                sample_ref = script_util.ws_get_ref(self.logger, ws_client,
                                                    ws_id, read_sample)
                ds = script_util.ru_reads_download(self.logger, sample_ref,
                                                   directory, token)
            except Exception, e:
                self.logger.exception(e)
                raise Exception(
                    "Unable to download reads file , {0}".format(read_sample))
            if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary':
                lib_type = 'SingleEnd'
                tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(
                    output_dir, gtf_file, bowtie2_base, ds['fwd'])
            if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
                lib_type = 'PairedEnd'
                if sample_type == 'KBaseAssembly.PairedEndLibrary':
                    if ('orientation' in params
                            and params['orientation'] is not None):
                        tophat_cmd += (' --' + params['orientation'])
                else:
                    # TODO: the following can be read from PEL object
                    if ('orientation' in params
                            and params['orientation'] is not None):
                        tophat_cmd += (' --' + params['orientation'])
                tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(
                    output_dir, gtf_file, bowtie2_base, ds['fwd'], ds['rev'])

#                if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
#                        lib_type = 'SingleEnd'
#                        if sample_type == 'KBaseAssembly.SingleEndLibrary':
#                            read_id = r_sample['data']['handle']['id']
#                            read_name =  r_sample['data']['handle']['file_name']
#                        else:
#                            read_id = r_sample['data']['lib']['file']['id']
#                            read_name =  r_sample['data']['lib']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token)
#                		tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name))
#                        except Exception,e:
#                                self.logger.exception(e)
#                                raise Exception( "Unable to download shock file , {0}".format(read_name))
#                if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
#                        lib_type = 'PairedEnd'
#                        if sample_type == 'KBaseAssembly.PairedEndLibrary':
#                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['handle_1']['id']
#                            read1_name = r_sample['data']['handle_1']['file_name']
#                            read2_id = r_sample['data']['handle_2']['id']
#                            read2_name = r_sample['data']['handle_2']['file_name']
#                        else:
#                            # TODO: the following can be read from PEL object
#                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['lib1']['file']['id']
#                            read1_name = r_sample['data']['lib1']['file']['file_name']
#                            read2_id = r_sample['data']['lib2']['file']['id']
#                            read2_name = r_sample['data']['lib2']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token)
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token)
#                		tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name))
#                        except Exception,e:
#                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
            try:
                self.logger.info("Executing: tophat {0}".format(tophat_cmd))
                cmdline_output, cmd_err = script_util.runProgram(
                    self.logger, "tophat", tophat_cmd, None, directory)
            except Exception, e:
                raise Exception("Failed to run command {0}\n{1}\n{2}".format(
                    tophat_cmd, cmdline_output, cmd_err))