Esempio n. 1
0
def _CallStringtie(logger,services,ws_client,hs,ws_id,num_threads,s_alignment,gtf_file,directory,genome_id,annotation_id,sample_id,alignmentset_id,params,token):
        print "Downloading Read Sample{0}".format(s_alignment)
        alignment_name = ws_client.get_object_info([{"ref" :s_alignment}],includeMetadata=None)[0][1]
        if not logger:
                logger = handler_util.create_logger(directory,"run_Stringtie_"+alignment_name)
        try:
                alignment = ws_client.get_objects(
                                        [{ 'ref' : s_alignment }])[0]
                input_direc = os.path.join(directory,alignment_name.split('_alignment')[0]+"_stringtie_input")
                if not os.path.exists(input_direc) : os.mkdir(input_direc)
                output_name = alignment_name.split('_alignment')[0]+"_stringtie_expression"
                output_dir = os.path.join(directory,output_name)
                #Download Alignment from shock
                a_file_id = alignment['data']['file']['id']
                a_filename = alignment['data']['file']['file_name']
                condition = alignment['data']['condition']
                try:
                     script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=a_file_id,filename=a_filename,directory=input_direc,token=token)
                except Exception,e:
                        raise Exception( "Unable to download shock file, {0},{1}".format(a_filename,"".join.tracback.format_exc()))
                try:
                    input_dir = os.path.join(input_direc,alignment_name)
                    if not os.path.exists(input_dir): os.mkdir(input_dir)
                    script_util.unzip_files(logger,os.path.join(input_direc,a_filename), input_dir)
                except Exception, e:
                       logger.error("".join(traceback.format_exc()))
                       raise Exception("Unzip alignment files  error")
Esempio n. 2
0
def _CalldiffExpCallforBallgown(logger, services, ws_client, hs, ws_id,
                                num_threads, alignment_file, transcripts_gtf,
                                merged_gtf, used_tool, directory, gtf_file):
    ### Create output directory name as ballgown/RNASeq_sample_name/ under diffexp_dir
    ### Get i as  alignment_file
    ### Get j as expression file
    ### If tool is 'StringTie: Then call function call_stringtiemerge ; return ballgown/RNASeq_sample_name/merged.gtf ; Call function call_stringtieBall
    ### else if tool is 'TableMaker'; Then call function call_cuffmerge; return ballgown/RNASeq_sample_name/merged.gtf ; Call function call_tablemaker
    ### return the  j and created paths.
    print "Running Differential Expression steps for {0}".format(
        transcripts_gtf)
    if not logger:
        logger = handler_util.create_logger(
            directory,
            "run_diffExpCallforBallgown_" + str(hex(uuid.getnode())))
    try:
        #merge_dir = os.path.join(directory,"merge")
        #if not os.path.exists(merge_dir): os.mkdir(merge_dir)
        #print merge_dir
        ballgown_dir = os.path.join(directory, "ballgown")
        if not os.path.exists(ballgown_dir): os.mkdir(ballgown_dir)
        print ballgown_dir
        print transcripts_gtf
        output_name = transcripts_gtf.split(
            "/")[-3] + "_" + transcripts_gtf.split("/")[-2]
        output_dir = os.path.join(ballgown_dir, output_name)
        if not os.path.exists(output_dir): os.mkdir(output_dir)
        print output_dir
        #Download Alignment from shock
        #condition = expression['data']['condition']
        if used_tool == 'StringTie':
            print "Entering StringTie"
            #merged_gtf = call_stringtiemerge(merge_dir,num_threads,gtf_file,list_file)
            call_stringtieBall(directory, ballgown_dir, num_threads,
                               merged_gtf, alignment_file)
        elif used_tool == 'Cufflinks':
            print "Entering Tablemaker"
            print "Args passed to table maker :  {0}, {1} ,{2} ,{3}, {4}".format(
                directory, ballgown_dir, num_threads, merged_gtf,
                alignment_file)
            #print directory + "\n" + ballgown_dir + "\n" + num_threads + "\n" +  merged_gtf + "\n" + alignment_file
            #merged_gtf = call_cuffmerge(merge_dir,num_threads,gtf_file,list_file)
            call_tablemaker(directory, output_dir, num_threads, merged_gtf,
                            alignment_file)
        if os.path.exists(ballgown_dir + "/t_data.ctab"):
            logger.info(
                "Running Differential Expression for Sample {0} completed successfully"
                .format(transcripts_gtf))
            print(
                "Running Differential Expression for Sample {0} completed successfully"
                .format(transcripts_gtf))
        print transcripts_gtf + ' : ' + output_dir
        return (transcripts_gtf, output_dir)
    except Exception, e:
        logger.exception(e)
        logger.exception("".join(traceback.format_exc()))
        raise Exception(
            "Error executing ballgown differential expression {0},{1}".format(
                transcripts_gtf, directory))
Esempio n. 3
0
def _CallHisat2(logger,services,ws_client,hs,ws_id,sample_type,num_threads,read_sample,condition,directory,genome_id,sampleset_id,params,token):
        #logger.info("Downloading Read Sample{0}".format(read_sample))
        print "Downloading Read Sample{0}".format(read_sample)
        if not logger:
                logger = handler_util.create_logger(directory,"run_Hisat2_"+read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
                r_sample = ws_client.get_objects(
                                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
                r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]
                sample_type = r_sample_info[2].split('-')[0]
                input_direc = os.path.join(directory,read_sample.split('.')[0]+"_hisat2_input")
                if not os.path.exists(input_direc): os.mkdir(input_direc)
                output_name = read_sample.split('.')[0]+"_hisat2_alignment"
                output_dir = os.path.join(directory,output_name)
                if not os.path.exists(output_dir): os.mkdir(output_dir)
                hisat2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".1.ht2"))
                ### Adding advanced options to Bowtie2Call
                hisat2_cmd = ''
                hisat2_cmd += ( ' -p {0}'.format(num_threads))
                if('quality_score' in params and params['quality_score'] is not None): hisat2_cmd += ( ' --'+params['quality_score'])
                if('alignment_type' in params and params['alignment_type'] is not None): hisat2_cmd += ( ' --'+params['alignment_type'] )
                if('trim5' in params and params['trim5'] is not None): hisat2_cmd += ( ' --trim5 '+str(params['trim5']))
                if('trim3' in params and params['trim3'] is not None): hisat2_cmd += ( ' --trim3 '+str(params['trim3']))
                if('np' in params and params['np'] is not None): hisat2_cmd += ( ' --np '+str(params['np']))
                if('minins' in params and params['minins'] is not None): hisat2_cmd += ( ' --minins '+str(params['minins']))
                if('maxins' in params and params['maxins'] is not None): hisat2_cmd += ( ' --maxins '+str(params['maxins']))
                #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
                if('min_intron_length' in params and params['min_intron_length'] is not None): hisat2_cmd += ( ' --min-intronlen '+str(params['min_intron_length']))
                if('max_intron_length' in params and params['max_intron_length'] is not None): hisat2_cmd += ( ' --max-intronlen '+str(params['max_intron_length']))
                if('no_spliced_alignment' in params and params['no_spliced_alignment'] != 0): hisat2_cmd += ( ' --no-spliced-alignment')
                if('transcriptome_mapping_only' in params and params['transcriptome_mapping_only'] != 0): hisat2_cmd += ( ' --transcriptome-mapping-only')
                if('tailor_alignments' in params and params['tailor_alignments'] is not None): 
			hisat2_cmd += ( ' --'+params['tailor_alignments'])
		out_file = output_dir +"/accepted_hits.sam"
                if sample_type  == 'KBaseAssembly.SingleEndLibrary':
                        lib_type = 'SingleEnd'
                        read_id = r_sample['data']['handle']['id']
                        read_name =  r_sample['data']['handle']['file_name']
                        try:
                                script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token)
                                hisat2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),hisat2_base,out_file)
                        except Exception,e:
                                #logger.exception( "Unable to download shock file , {0}".format(read_name))
                                raise Exception( "Unable to download shock file , {0}".format(read_name))
                if sample_type == 'KBaseAssembly.PairedEndLibrary':
                        lib_type = 'PairedEnd'
                	if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
                        read1_id = r_sample['data']['handle_1']['id']
                        read1_name = r_sample['data']['handle_1']['file_name']
                        read2_id = r_sample['data']['handle_2']['id']
                        read2_name = r_sample['data']['handle_2']['file_name']
                        try:
                                script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token)
                                script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token)
                                hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(output_dir,read2_name),hisat2_base,out_file)
                        except Exception,e:
                                #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
Esempio n. 4
0
    def runEach(self,task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        
        s_alignment = task_params['job_id']
        gtf_file = task_params['gtf_file']
        directory = task_params['stringtie_dir']
        genome_id = task_params['genome_id']
        annotation_id = task_params['annotation_id']
        sample_id = task_params['sample_id']
        alignmentset_id = task_params['alignmentset_id']
        ws_id = task_params['ws_id']

        print "Downloading Sample Alignment from workspace {0}".format(s_alignment)
        logger.info("Downloading Sample Alignment from workspace {0}".format(s_alignment))
        alignment_name = ws_client.get_object_info([{"ref" :s_alignment}],includeMetadata=None)[0][1]
        if not logger:
           logger = handler_util.create_logger(directory,"run_Stringtie_"+alignment_name)
        try:
           alignment = ws_client.get_objects(
                                        [{ 'ref' : s_alignment }])[0]
           input_direc = os.path.join(directory,alignment_name.split('_alignment')[0]+"_stringtie_input")
           if not os.path.exists(input_direc) : os.mkdir(input_direc)
           output_name = alignment_name.split('_alignment')[0]+"_stringtie_expression"
           output_dir = os.path.join(directory,output_name)
           #Download Alignment from shock
           a_file_id = alignment['data']['file']['id']
           a_filename = alignment['data']['file']['file_name']
           condition = alignment['data']['condition']
           try:
                script_util.download_file_from_shock(logger, shock_service_url=self.urls['shock_service_url'], shock_id=a_file_id,filename=a_filename,directory=input_direc,token=token)
           except Exception,e:
                raise Exception( "Unable to download shock file, {0},{1}".format(a_filename,"".join(traceback.format_exc())))
           try:
                input_dir = os.path.join(input_direc,alignment_name)
                if not os.path.exists(input_dir): os.mkdir(input_dir)
                script_util.unzip_files(logger,os.path.join(input_direc,a_filename), input_dir)
           except Exception, e:
                raise Exception(e)
                logger.error("".join(traceback.format_exc()))
                raise Exception("Unzip alignment files  error")
def _CalldiffExpCallforBallgown(logger,services,ws_client,hs,ws_id,num_threads,alignment_file,transcripts_gtf,merged_gtf,used_tool,directory,gtf_file):
	### Create output directory name as ballgown/RNASeq_sample_name/ under diffexp_dir
	### Get i as  alignment_file
	### Get j as expression file
	### If tool is 'StringTie: Then call function call_stringtiemerge ; return ballgown/RNASeq_sample_name/merged.gtf ; Call function call_stringtieBall
        ### else if tool is 'TableMaker'; Then call function call_cuffmerge; return ballgown/RNASeq_sample_name/merged.gtf ; Call function call_tablemaker
	### return the  j and created paths. 
        print "Running Differential Expression steps for {0}".format(transcripts_gtf)
        if not logger:
                logger = handler_util.create_logger(directory,"run_diffExpCallforBallgown_"+str(hex(uuid.getnode())))
        try:
		#merge_dir = os.path.join(directory,"merge") 
		#if not os.path.exists(merge_dir): os.mkdir(merge_dir)
		#print merge_dir
		ballgown_dir = os.path.join(directory,"ballgown")
		if not os.path.exists(ballgown_dir): os.mkdir(ballgown_dir)
		print ballgown_dir
		print transcripts_gtf
		output_name = transcripts_gtf.split("/")[-3]+"_"+transcripts_gtf.split("/")[-2]		
                output_dir = os.path.join(ballgown_dir,output_name)
		if not os.path.exists(output_dir): os.mkdir(output_dir)
		print output_dir
                #Download Alignment from shock
                #condition = expression['data']['condition']
		if used_tool == 'StringTie':
			print "Entering StringTie"
			#merged_gtf = call_stringtiemerge(merge_dir,num_threads,gtf_file,list_file)
			call_stringtieBall(directory,ballgown_dir,num_threads,merged_gtf,alignment_file)
                elif used_tool == 'Cufflinks':
			print "Entering Tablemaker"
			print "Args passed to table maker :  {0}, {1} ,{2} ,{3}, {4}".format(directory,ballgown_dir,num_threads,merged_gtf,alignment_file)
			#print directory + "\n" + ballgown_dir + "\n" + num_threads + "\n" +  merged_gtf + "\n" + alignment_file 
			#merged_gtf = call_cuffmerge(merge_dir,num_threads,gtf_file,list_file)	
			call_tablemaker(directory,output_dir,num_threads,merged_gtf,alignment_file)
		if os.path.exists(ballgown_dir+"/t_data.ctab") :
			logger.info("Running Differential Expression for Sample {0} completed successfully".format(transcripts_gtf))
			print("Running Differential Expression for Sample {0} completed successfully".format(transcripts_gtf))
		print transcripts_gtf + ' : ' + output_dir 
        	return (transcripts_gtf, output_dir )
	except Exception,e:
		logger.exception(e)
                logger.exception("".join(traceback.format_exc()))
                raise Exception("Error executing ballgown differential expression {0},{1}".format(transcripts_gtf,directory))
Esempio n. 6
0
def _CallHisat2(logger, services, ws_client, hs, ws_id, sample_type,
                num_threads, read_sample, condition, directory, genome_id,
                sampleset_id, params, token):
    #logger.info("Downloading Read Sample{0}".format(read_sample))
    print "Downloading Read Sample{0}".format(read_sample)
    if not logger:
        logger = handler_util.create_logger(directory,
                                            "run_Hisat2_" + read_sample)
    logger.info("Downloading Read Sample{0}".format(read_sample))
    try:
        r_sample = ws_client.get_objects([{
            'name': read_sample,
            'workspace': ws_id
        }])[0]
        r_sample_info = ws_client.get_object_info_new(
            {"objects": [{
                'name': read_sample,
                'workspace': ws_id
            }]})[0]
        sample_type = r_sample_info[2].split('-')[0]
        input_direc = os.path.join(directory,
                                   read_sample.split('.')[0] + "_hisat2_input")
        if not os.path.exists(input_direc): os.mkdir(input_direc)
        output_name = read_sample.split('.')[0] + "_hisat2_alignment"
        output_dir = os.path.join(directory, output_name)
        if not os.path.exists(output_dir): os.mkdir(output_dir)
        hisat2_base = os.path.join(
            directory, handler_util.get_file_with_suffix(directory, ".1.ht2"))
        ### Adding advanced options to Bowtie2Call
        hisat2_cmd = ''
        hisat2_cmd += (' -p {0}'.format(num_threads))
        if ('quality_score' in params and params['quality_score'] is not None):
            hisat2_cmd += (' --' + params['quality_score'])
        if ('alignment_type' in params
                and params['alignment_type'] is not None):
            hisat2_cmd += (' --' + params['alignment_type'])
        if ('trim5' in params and params['trim5'] is not None):
            hisat2_cmd += (' --trim5 ' + str(params['trim5']))
        if ('trim3' in params and params['trim3'] is not None):
            hisat2_cmd += (' --trim3 ' + str(params['trim3']))
        if ('np' in params and params['np'] is not None):
            hisat2_cmd += (' --np ' + str(params['np']))
        if ('minins' in params and params['minins'] is not None):
            hisat2_cmd += (' --minins ' + str(params['minins']))
        if ('maxins' in params and params['maxins'] is not None):
            hisat2_cmd += (' --maxins ' + str(params['maxins']))
        #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
        if ('min_intron_length' in params
                and params['min_intron_length'] is not None):
            hisat2_cmd += (' --min-intronlen ' +
                           str(params['min_intron_length']))
        if ('max_intron_length' in params
                and params['max_intron_length'] is not None):
            hisat2_cmd += (' --max-intronlen ' +
                           str(params['max_intron_length']))
        if ('no_spliced_alignment' in params
                and params['no_spliced_alignment'] != 0):
            hisat2_cmd += (' --no-spliced-alignment')
        if ('transcriptome_mapping_only' in params
                and params['transcriptome_mapping_only'] != 0):
            hisat2_cmd += (' --transcriptome-mapping-only')
        if ('tailor_alignments' in params
                and params['tailor_alignments'] is not None):
            hisat2_cmd += (' --' + params['tailor_alignments'])
        out_file = output_dir + "/accepted_hits.sam"
        if sample_type == 'KBaseAssembly.SingleEndLibrary':
            lib_type = 'SingleEnd'
            read_id = r_sample['data']['handle']['id']
            read_name = r_sample['data']['handle']['file_name']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read_id,
                    filename=read_name,
                    directory=input_direc,
                    token=token)
                hisat2_cmd += " -U {0} -x {1} -S {2}".format(
                    os.path.join(input_direc, read_name), hisat2_base,
                    out_file)
            except Exception, e:
                #logger.exception( "Unable to download shock file , {0}".format(read_name))
                raise Exception(
                    "Unable to download shock file , {0}".format(read_name))
        if sample_type == 'KBaseAssembly.PairedEndLibrary':
            lib_type = 'PairedEnd'
            if ('orientation' in params and params['orientation'] is not None):
                hisat2_cmd += (' --' + params['orientation'])
            read1_id = r_sample['data']['handle_1']['id']
            read1_name = r_sample['data']['handle_1']['file_name']
            read2_id = r_sample['data']['handle_2']['id']
            read2_name = r_sample['data']['handle_2']['file_name']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read1_id,
                    filename=read1_name,
                    directory=input_direc,
                    token=token)
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read2_id,
                    filename=read2_name,
                    directory=input_direc,
                    token=token)
                hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(
                    os.path.join(input_direc, read1_name),
                    os.path.join(output_dir, read2_name), hisat2_base,
                    out_file)
            except Exception, e:
                #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                raise Exception(
                    "Unable to download shock file , {0} or {1}".format(
                        read1_name, read2_name))
Esempio n. 7
0
    def runEach(self, task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']

        s_alignment = task_params['job_id']
        gtf_file = task_params['gtf_file']
        directory = task_params['stringtie_dir']
        genome_id = task_params['genome_id']
        annotation_id = task_params['annotation_id']
        sample_id = task_params['sample_id']
        alignmentset_id = task_params['alignmentset_id']
        ws_id = task_params['ws_id']

        print "Downloading Sample Alignment from workspace {0}".format(
            s_alignment)
        logger.info("Downloading Sample Alignment from workspace {0}".format(
            s_alignment))
        alignment_name = ws_client.get_object_info([{
            "ref": s_alignment
        }],
                                                   includeMetadata=None)[0][1]
        if not logger:
            logger = handler_util.create_logger(
                directory, "run_Stringtie_" + alignment_name)
        try:
            alignment = ws_client.get_objects([{'ref': s_alignment}])[0]
            input_direc = os.path.join(
                directory,
                alignment_name.split('_alignment')[0] + "_stringtie_input")
            if not os.path.exists(input_direc): os.mkdir(input_direc)
            output_name = alignment_name.split(
                '_alignment')[0] + "_stringtie_expression"
            output_dir = os.path.join(directory, output_name)
            #Download Alignment from shock
            a_file_id = alignment['data']['file']['id']
            a_filename = alignment['data']['file']['file_name']
            condition = alignment['data']['condition']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=self.urls['shock_service_url'],
                    shock_id=a_file_id,
                    filename=a_filename,
                    directory=input_direc,
                    token=token)
            except Exception, e:
                raise Exception(
                    "Unable to download shock file, {0},{1}".format(
                        a_filename, "".join(traceback.format_exc())))
            try:
                input_dir = os.path.join(input_direc, alignment_name)
                if not os.path.exists(input_dir): os.mkdir(input_dir)
                script_util.unzip_files(logger,
                                        os.path.join(input_direc, a_filename),
                                        input_dir)
            except Exception, e:
                raise Exception(e)
                logger.error("".join(traceback.format_exc()))
                raise Exception("Unzip alignment files  error")