Python download_file_from_shock Exemples, script_util.download_file_from_shock Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : parallel_tools.py Projet : briehl/KBaseRNASeq

def _CallCufflinks(logger,services,ws_client,hs,ws_id,num_threads,s_alignment,gtf_file,directory,genome_id,annotation_id,sample_id,alignmentset_id,params,token):
	print "Downloading Read Sample{0}".format(s_alignment)
	alignment_name = ws_client.get_object_info([{"ref" :s_alignment}],includeMetadata=None)[0][1]
	if not logger:
		logger = create_logger(directory,"run_Cufflinks_"+alignment_name)	
	try:
		alignment = ws_client.get_objects(
                                        [{ 'ref' : s_alignment }])[0]
		#alignment_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]	
		#sample_type = r_sample_info[2].split('-')[0]
		output_name = alignment_name.split('_alignment')[0]+"_cufflinks_expression"
		output_dir = os.path.join(directory,output_name)
		#Download Alignment from shock
		a_file_id = alignment['data']['file']['id']
		a_filename = alignment['data']['file']['file_name']
		condition = alignment['data']['condition']
		#i_name = alignment_name+"_"+a_filename
		#if replicate_id in alignment['data'] : replicate_id = alignment['data']['replicate_id']
		try:
                     script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=a_file_id,filename=a_filename,directory=directory,token=token)
                except Exception,e:
                        raise Exception( "Unable to download shock file, {0}".format(i_name))
                try:
		    input_dir = os.path.join(directory,alignment_name)
		    if not os.path.exists(input_dir): os.mkdir(input_dir)
                    script_util.unzip_files(logger,os.path.join(directory,a_filename), input_dir)
                except Exception, e:
                       logger.error("".join(traceback.format_exc()))
                       raise Exception("Unzip alignment files")

Exemple #2

0

Afficher le fichier

Fichier : call_stringtie.py Projet : briehl/KBaseRNASeq

def _CallStringtie(logger,services,ws_client,hs,ws_id,num_threads,s_alignment,gtf_file,directory,genome_id,annotation_id,sample_id,alignmentset_id,params,token):
        print "Downloading Read Sample{0}".format(s_alignment)
        alignment_name = ws_client.get_object_info([{"ref" :s_alignment}],includeMetadata=None)[0][1]
        if not logger:
                logger = handler_util.create_logger(directory,"run_Stringtie_"+alignment_name)
        try:
                alignment = ws_client.get_objects(
                                        [{ 'ref' : s_alignment }])[0]
                input_direc = os.path.join(directory,alignment_name.split('_alignment')[0]+"_stringtie_input")
                if not os.path.exists(input_direc) : os.mkdir(input_direc)
                output_name = alignment_name.split('_alignment')[0]+"_stringtie_expression"
                output_dir = os.path.join(directory,output_name)
                #Download Alignment from shock
                a_file_id = alignment['data']['file']['id']
                a_filename = alignment['data']['file']['file_name']
                condition = alignment['data']['condition']
                try:
                     script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=a_file_id,filename=a_filename,directory=input_direc,token=token)
                except Exception,e:
                        raise Exception( "Unable to download shock file, {0},{1}".format(a_filename,"".join.tracback.format_exc()))
                try:
                    input_dir = os.path.join(input_direc,alignment_name)
                    if not os.path.exists(input_dir): os.mkdir(input_dir)
                    script_util.unzip_files(logger,os.path.join(input_direc,a_filename), input_dir)
                except Exception, e:
                       logger.error("".join(traceback.format_exc()))
                       raise Exception("Unzip alignment files  error")

Exemple #3

0

Afficher le fichier

Fichier : StringTie.py Projet : briehl/KBaseRNASeq

    def runEach(self,task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        
        s_alignment = task_params['job_id']
        gtf_file = task_params['gtf_file']
        directory = task_params['stringtie_dir']
        genome_id = task_params['genome_id']
        annotation_id = task_params['annotation_id']
        sample_id = task_params['sample_id']
        alignmentset_id = task_params['alignmentset_id']
        ws_id = task_params['ws_id']

        print "Downloading Sample Alignment from workspace {0}".format(s_alignment)
        logger.info("Downloading Sample Alignment from workspace {0}".format(s_alignment))
        alignment_name = ws_client.get_object_info([{"ref" :s_alignment}],includeMetadata=None)[0][1]
        if not logger:
           logger = handler_util.create_logger(directory,"run_Stringtie_"+alignment_name)
        try:
           alignment = ws_client.get_objects(
                                        [{ 'ref' : s_alignment }])[0]
           input_direc = os.path.join(directory,alignment_name.split('_alignment')[0]+"_stringtie_input")
           if not os.path.exists(input_direc) : os.mkdir(input_direc)
           output_name = alignment_name.split('_alignment')[0]+"_stringtie_expression"
           output_dir = os.path.join(directory,output_name)
           #Download Alignment from shock
           a_file_id = alignment['data']['file']['id']
           a_filename = alignment['data']['file']['file_name']
           condition = alignment['data']['condition']
           try:
                script_util.download_file_from_shock(logger, shock_service_url=self.urls['shock_service_url'], shock_id=a_file_id,filename=a_filename,directory=input_direc,token=token)
           except Exception,e:
                raise Exception( "Unable to download shock file, {0},{1}".format(a_filename,"".join(traceback.format_exc())))
           try:
                input_dir = os.path.join(input_direc,alignment_name)
                if not os.path.exists(input_dir): os.mkdir(input_dir)
                script_util.unzip_files(logger,os.path.join(input_direc,a_filename), input_dir)
           except Exception, e:
                raise Exception(e)
                logger.error("".join(traceback.format_exc()))
                raise Exception("Unzip alignment files  error")

Exemple #4

0

Afficher le fichier

Fichier : parallel_tools.py Projet : ugswork/KBaseRNASeq

def _CallCufflinks(logger, services, ws_client, hs, ws_id, num_threads,
                   s_alignment, gtf_file, directory, genome_id, annotation_id,
                   sample_id, alignmentset_id, params, token):
    print "Downloading Read Sample{0}".format(s_alignment)
    alignment_name = ws_client.get_object_info([{
        "ref": s_alignment
    }],
                                               includeMetadata=None)[0][1]
    if not logger:
        logger = create_logger(directory, "run_Cufflinks_" + alignment_name)
    try:
        alignment = ws_client.get_objects([{'ref': s_alignment}])[0]
        #alignment_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]
        #sample_type = r_sample_info[2].split('-')[0]
        output_name = alignment_name.split(
            '_alignment')[0] + "_cufflinks_expression"
        output_dir = os.path.join(directory, output_name)
        #Download Alignment from shock
        a_file_id = alignment['data']['file']['id']
        a_filename = alignment['data']['file']['file_name']
        condition = alignment['data']['condition']
        #i_name = alignment_name+"_"+a_filename
        #if replicate_id in alignment['data'] : replicate_id = alignment['data']['replicate_id']
        try:
            script_util.download_file_from_shock(
                logger,
                shock_service_url=services['shock_service_url'],
                shock_id=a_file_id,
                filename=a_filename,
                directory=directory,
                token=token)
        except Exception, e:
            raise Exception(
                "Unable to download shock file, {0}".format(i_name))
        try:
            input_dir = os.path.join(directory, alignment_name)
            if not os.path.exists(input_dir): os.mkdir(input_dir)
            script_util.unzip_files(logger,
                                    os.path.join(directory,
                                                 a_filename), input_dir)
        except Exception, e:
            logger.error("".join(traceback.format_exc()))
            raise Exception("Unzip alignment files")

Exemple #5

0

Afficher le fichier

Fichier : call_hisat2.py Projet : briehl/KBaseRNASeq

def _CallHisat2(logger,services,ws_client,hs,ws_id,sample_type,num_threads,read_sample,condition,directory,genome_id,sampleset_id,params,token):
        #logger.info("Downloading Read Sample{0}".format(read_sample))
        print "Downloading Read Sample{0}".format(read_sample)
        if not logger:
                logger = handler_util.create_logger(directory,"run_Hisat2_"+read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
                r_sample = ws_client.get_objects(
                                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
                r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]
                sample_type = r_sample_info[2].split('-')[0]
                input_direc = os.path.join(directory,read_sample.split('.')[0]+"_hisat2_input")
                if not os.path.exists(input_direc): os.mkdir(input_direc)
                output_name = read_sample.split('.')[0]+"_hisat2_alignment"
                output_dir = os.path.join(directory,output_name)
                if not os.path.exists(output_dir): os.mkdir(output_dir)
                hisat2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".1.ht2"))
                ### Adding advanced options to Bowtie2Call
                hisat2_cmd = ''
                hisat2_cmd += ( ' -p {0}'.format(num_threads))
                if('quality_score' in params and params['quality_score'] is not None): hisat2_cmd += ( ' --'+params['quality_score'])
                if('alignment_type' in params and params['alignment_type'] is not None): hisat2_cmd += ( ' --'+params['alignment_type'] )
                if('trim5' in params and params['trim5'] is not None): hisat2_cmd += ( ' --trim5 '+str(params['trim5']))
                if('trim3' in params and params['trim3'] is not None): hisat2_cmd += ( ' --trim3 '+str(params['trim3']))
                if('np' in params and params['np'] is not None): hisat2_cmd += ( ' --np '+str(params['np']))
                if('minins' in params and params['minins'] is not None): hisat2_cmd += ( ' --minins '+str(params['minins']))
                if('maxins' in params and params['maxins'] is not None): hisat2_cmd += ( ' --maxins '+str(params['maxins']))
                #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
                if('min_intron_length' in params and params['min_intron_length'] is not None): hisat2_cmd += ( ' --min-intronlen '+str(params['min_intron_length']))
                if('max_intron_length' in params and params['max_intron_length'] is not None): hisat2_cmd += ( ' --max-intronlen '+str(params['max_intron_length']))
                if('no_spliced_alignment' in params and params['no_spliced_alignment'] != 0): hisat2_cmd += ( ' --no-spliced-alignment')
                if('transcriptome_mapping_only' in params and params['transcriptome_mapping_only'] != 0): hisat2_cmd += ( ' --transcriptome-mapping-only')
                if('tailor_alignments' in params and params['tailor_alignments'] is not None): 
			hisat2_cmd += ( ' --'+params['tailor_alignments'])
		out_file = output_dir +"/accepted_hits.sam"
                if sample_type  == 'KBaseAssembly.SingleEndLibrary':
                        lib_type = 'SingleEnd'
                        read_id = r_sample['data']['handle']['id']
                        read_name =  r_sample['data']['handle']['file_name']
                        try:
                                script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token)
                                hisat2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),hisat2_base,out_file)
                        except Exception,e:
                                #logger.exception( "Unable to download shock file , {0}".format(read_name))
                                raise Exception( "Unable to download shock file , {0}".format(read_name))
                if sample_type == 'KBaseAssembly.PairedEndLibrary':
                        lib_type = 'PairedEnd'
                	if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
                        read1_id = r_sample['data']['handle_1']['id']
                        read1_name = r_sample['data']['handle_1']['file_name']
                        read2_id = r_sample['data']['handle_2']['id']
                        read2_name = r_sample['data']['handle_2']['file_name']
                        try:
                                script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token)
                                script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token)
                                hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(output_dir,read2_name),hisat2_base,out_file)
                        except Exception,e:
                                #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))

Exemple #6

0

Afficher le fichier

Fichier : parallel_tools.py Projet : briehl/KBaseRNASeq

def _CallBowtie2(logger,services,ws_client,hs,ws_id,sample_type,num_threads,read_sample,condition,directory,bowtie2index_id,genome_id,sampleset_id,params,token):
	#logger.info("Downloading Read Sample{0}".format(read_sample))
	print "Downloading Read Sample{0}".format(read_sample)
	if not logger:
		logger = create_logger(directory,"run_Bowtie2_"+read_sample)
	
	logger.info("Downloading Read Sample{0}".format(read_sample))
	try:
		r_sample = ws_client.get_objects(
                                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
		r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]	
		sample_type = r_sample_info[2].split('-')[0]
		output_name = read_sample.split('.')[0]+"_bowtie2_alignment"
		output_dir = os.path.join(directory,output_name)
	        if not os.path.exists(output_dir): os.mkdir(output_dir)
            	out_file = output_dir +"/accepted_hits.sam"
            	bowtie2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".rev.1.bt2"))
            	### Adding advanced options to Bowtie2Call
            	bowtie2_cmd = ''
		bowtie2_cmd += ( ' -p {0}'.format(num_threads))
            	if('quality_score' in params and params['quality_score'] is not None): bowtie2_cmd += ( ' --'+params['quality_score'])
            	if('alignment_type' in params and params['alignment_type'] is not None): bowtie2_cmd += ( ' --'+params['alignment_type'] )
            	if('preset_options' in params and params['preset_options'] is not None ) and ('alignment_type' in params and params['alignment_type'] is not None):
                	if (params['alignment_type'] == 'local'):
                        	 bowtie2_cmd += (' --'+params['preset_options']+'-local')
                	else: bowtie2_cmd += (' --'+params['preset_options'] )
            	if('trim5' in params and params['trim5'] is not None): bowtie2_cmd += ( ' --trim5 '+str(params['trim5']))
            	if('trim3' in params and params['trim3'] is not None): bowtie2_cmd += ( ' --trim3 '+str(params['trim3']))
            	if('np' in params and params['np'] is not None): bowtie2_cmd += ( ' --np '+str(params['np']))
            	if('minins' in params and params['minins'] is not None): bowtie2_cmd += ( ' --minins '+str(params['minins']))
            	if('maxins' in params and params['maxins'] is not None): bowtie2_cmd += ( ' --maxins '+str(params['maxins']))
            	if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation'])
		
		if sample_type  == 'KBaseAssembly.SingleEndLibrary':
			lib_type = 'SingleEnd'
			read_id = r_sample['data']['handle']['id']
			read_name =  r_sample['data']['handle']['file_name']
			try:
                     		script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token)	
				bowtie2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(directory,read_name),bowtie2_base,out_file)
                	except Exception,e:
                        	#logger.exception( "Unable to download shock file , {0}".format(read_name))
                        	raise Exception( "Unable to download shock file , {0}".format(read_name))
		if sample_type == 'KBaseAssembly.PairedEndLibrary':
			lib_type = 'PairedEnd'
			read1_id = r_sample['data']['handle_1']['id']
			read1_name = r_sample['data']['handle_1']['file_name']
			read2_id = r_sample['data']['handle_2']['id'] 
			read2_name = r_sample['data']['handle_2']['file_name']
			try:
                                script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token)
                                script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token)
				bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(directory,read1_name),os.path.join(directory,read2_name),bowtie2_base,out_file)
			except Exception,e:
                        	#logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                        	raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))

Exemple #7

0

Afficher le fichier

Fichier : script_util2.py Projet : srividya22/kb_cummerbund

def extract_cuffdiff_data (logger, shock_url, scratch, s_res, user_token):

        returnVal = False
       # Get input data Shock Id and Filename.
        cuffdiff_shock_id = s_res[0]['data']['file']['id']
        cuffdiff_file_name = s_res[0]['data']['file']['file_name']


        filesize = None

        dx = script_util.download_file_from_shock( logger,
            shock_url, cuffdiff_shock_id, cuffdiff_file_name,
            scratch, filesize, user_token)

        #cuffdiff_file_name =None

        #Decompress tar file and keep it in a directory
        zipfile = join(scratch, cuffdiff_file_name)
        dstnExtractFolder1 = join(scratch, "cuffdiffData")
        dstnExtractFolder = join(dstnExtractFolder1, "cuffdiff")

        if not os.path.exists(dstnExtractFolder):
            os.makedirs(dstnExtractFolder)

        #untarStatus = untar_files(logger, tarfile, dstnExtractFolder)
        #if untarStatus == False:
        #    logger.info("Problem extracting the archive")
        #    return returnVal
        unzipStatus = script_util.unzip_files(logger, zipfile, dstnExtractFolder)
        if unzipStatus == False:
            logger.info("Problem extracting the archive")
            return returnVal


        foldersinExtractFolder = os.listdir(dstnExtractFolder)

        if len(foldersinExtractFolder) == 0:
            logger.info("Problem extracting the archive")
            return returnVal

        # Run R script to run cummerbund json and update the cummerbund output json file
        cuffdiff_dir = dstnExtractFolder

        return cuffdiff_dir

Exemple #8

0

Afficher le fichier

def extract_cuffdiff_data(logger, shock_url, scratch, s_res, user_token):

    returnVal = False
    # Get input data Shock Id and Filename.
    cuffdiff_shock_id = s_res[0]['data']['file']['id']
    cuffdiff_file_name = s_res[0]['data']['file']['file_name']

    filesize = None

    dx = script_util.download_file_from_shock(logger, shock_url,
                                              cuffdiff_shock_id,
                                              cuffdiff_file_name, scratch,
                                              filesize, user_token)

    #cuffdiff_file_name =None

    #Decompress tar file and keep it in a directory
    zipfile = join(scratch, cuffdiff_file_name)
    dstnExtractFolder1 = join(scratch, "cuffdiffData")
    dstnExtractFolder = join(dstnExtractFolder1, "cuffdiff")

    if not os.path.exists(dstnExtractFolder):
        os.makedirs(dstnExtractFolder)

    #untarStatus = untar_files(logger, tarfile, dstnExtractFolder)
    #if untarStatus == False:
    #    logger.info("Problem extracting the archive")
    #    return returnVal
    unzipStatus = script_util.unzip_files(logger, zipfile, dstnExtractFolder)
    if unzipStatus == False:
        logger.info("Problem extracting the archive")
        return returnVal

    foldersinExtractFolder = os.listdir(dstnExtractFolder)

    if len(foldersinExtractFolder) == 0:
        logger.info("Problem extracting the archive")
        return returnVal

    # Run R script to run cummerbund json and update the cummerbund output json file
    cuffdiff_dir = dstnExtractFolder

    return cuffdiff_dir

Exemple #9

0

Afficher le fichier

Fichier : parallel_tools.py Projet : briehl/KBaseRNASeq

def _CallTophat(logger,services,ws_client,hs,ws_id,sample_type,num_threads,read_sample,gtf_file,condition,directory,bowtie2index_id,genome_id,sampleset_id,params,token):
	print "Downloading Read Sample{0}".format(read_sample)
	if not logger:
		logger = create_logger(directory,"run_Tophat_"+read_sample)	
	try:
		r_sample = ws_client.get_objects(
                                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
		r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]	
		sample_type = r_sample_info[2].split('-')[0]
		output_name = read_sample.split('.')[0]+"_tophat_alignment"
		output_dir = os.path.join(directory,output_name)
	        #if not os.path.exists(output_dir): os.makedirs(output_dir)
            	#out_file = output_dir +"/accepted_hits.sam"
            	bowtie2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".rev.1.bt2"))

            	### Adding advanced options to tophat command
		tophat_cmd = (' -p '+str(num_threads))
            	if('max_intron_length' in params and params['max_intron_length'] is not None ) : tophat_cmd += (' -I '+str(params['max_intron_length']))
            	if('min_intron_length' in params and params['min_intron_length'] is not None ): tophat_cmd += (' -i '+str(params['min_intron_length']))
            	if('min_anchor_length' in params and params['min_anchor_length'] is not None ): tophat_cmd += (' -a '+str(params['min_anchor_length']))
            	if('read_edit_dist' in params and params['read_edit_dist'] is not None ) : tophat_cmd += (' --read-edit-dist '+str(params['read_edit_dist']))
            	if('read_gap_length' in params and params['read_gap_length'] is not None) : tophat_cmd += (' --read-gap-length '+str(params['read_gap_length']))
            	if('read_mismatches' in params and params['read_mismatches'] is not None) : tophat_cmd += (' -N '+str(params['read_mismatches']))
            	if('library_type' in params and params['library_type']  is not None ) : tophat_cmd += (' --library-type ' + params['library_type'])
            	if('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1) : tophat_cmd += ' --report-secondary-alignments'
            	if('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search'
            	if('preset_options' in params and params['preset_options'] is not None ): tophat_cmd += ' --'+params['preset_options']
		if sample_type  == 'KBaseAssembly.SingleEndLibrary':
			lib_type = 'SingleEnd'
			read_id = r_sample['data']['handle']['id']
			read_name =  r_sample['data']['handle']['file_name']
			try:
                     		script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token)	
                		tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name))
                	except Exception,e:
                        	raise Exception( "Unable to download shock file , {0}".format(read_name))
		if sample_type == 'KBaseAssembly.PairedEndLibrary':
			lib_type = 'PairedEnd'
			read1_id = r_sample['data']['handle_1']['id']
			read1_name = r_sample['data']['handle_1']['file_name']
			read2_id = r_sample['data']['handle_2']['id'] 
			read2_name = r_sample['data']['handle_2']['file_name']
			try:
                                script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token)
                                script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token)
                		tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name))
			except Exception,e:
                        	raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))

Exemple #10

0

Afficher le fichier

Fichier : call_diffExpCallforBallgown.py Projet : briehl/KBaseRNASeq

               raise ValueError("Please ensure you have atleast 2 expressions to run diffExpCallforBallgown in Set mode")
            ### Check if the gtf file exists in the workspace. if exists download the file from that
            annotation_id = e_sample['data']['genome_id']
            logger.info("Check if the gtf file exists in the workspace".format(annotation_id))
            annotation_name = ws_client.get_object_info([{"ref" :annotation_id}],includeMetadata=None)[0][1]
            gtf_obj_name = annotation_name+"_GTF_Annotation"
            ret = script_util.if_obj_exists(None,ws_client,params['ws_id'],"KBaseRNASeq.GFFAnnotation",[gtf_obj_name])
            if not ret is None:
                logger.info("GFF Annotation Exist for Genome Annotation {0}.... Skipping step ".format(annotation_name))
                gtf_obj= ws_client.get_objects([{'name' : gtf_obj_name,'workspace' : params['ws_id']}])[0]
                gtf_info = ws_client.get_object_info_new({"objects": [{'name': gtf_obj_name, 'workspace': params['ws_id']}]})[0]
                gtf_annotation_id = str(gtf_info[6]) + '/' + str(gtf_info[0]) + '/' + str(gtf_info[4])
                gtf_id=gtf_obj['data']['handle']['id']
                gtf_name=gtf_obj['data']['handle']['file_name']
                try:
                     script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=gtf_id,filename=gtf_name, directory=diffexp_dir,token=token)
                     gtf_file = os.path.join(diffexp_dir,gtf_name)
                except Exception,e:
                     raise Exception( "Unable to download shock file, {0}".format(gtf_name))
            else:
                fasta_file= script_util.generate_fasta(logger,services,token,annotation_id,diffexp_dir,annotation_name)
                logger.info("Sanitizing the fasta file to correct id names {}".format(datetime.datetime.utcnow()))
                mapping_filename = c_mapping.create_sanitized_contig_ids(fasta_file)
                c_mapping.replace_fasta_contig_ids(fasta_file, mapping_filename, to_modified=True)
                logger.info("Generating FASTA file completed successfully : {}".format(datetime.datetime.utcnow()))
                gtf_file = script_util.create_gtf_annotation(logger,ws_client,hs,services,params['ws_id'],annotation_id,gtf_obj_name,fasta_file,diffexp_dir,token)
            m_expr_ids = e_sample['data']['mapped_expression_ids']
	    m_align_exp = []
            labels = []
            expressions = []
            counter = 0

Exemple #11

0

Afficher le fichier

Fichier : parallel_tools.py Projet : ugswork/KBaseRNASeq

def _CallBowtie2(logger, services, ws_client, hs, ws_id, sample_type,
                 num_threads, read_sample, condition, directory,
                 bowtie2index_id, genome_id, sampleset_id, params, token):
    #logger.info("Downloading Read Sample{0}".format(read_sample))
    print "Downloading Read Sample{0}".format(read_sample)
    if not logger:
        logger = create_logger(directory, "run_Bowtie2_" + read_sample)

    logger.info("Downloading Read Sample{0}".format(read_sample))
    try:
        r_sample = ws_client.get_objects([{
            'name': read_sample,
            'workspace': ws_id
        }])[0]
        r_sample_info = ws_client.get_object_info_new(
            {"objects": [{
                'name': read_sample,
                'workspace': ws_id
            }]})[0]
        sample_type = r_sample_info[2].split('-')[0]
        output_name = read_sample.split('.')[0] + "_bowtie2_alignment"
        output_dir = os.path.join(directory, output_name)
        if not os.path.exists(output_dir): os.mkdir(output_dir)
        out_file = output_dir + "/accepted_hits.sam"
        bowtie2_base = os.path.join(
            directory,
            handler_util.get_file_with_suffix(directory, ".rev.1.bt2"))
        ### Adding advanced options to Bowtie2Call
        bowtie2_cmd = ''
        bowtie2_cmd += (' -p {0}'.format(num_threads))
        if ('quality_score' in params and params['quality_score'] is not None):
            bowtie2_cmd += (' --' + params['quality_score'])
        if ('alignment_type' in params
                and params['alignment_type'] is not None):
            bowtie2_cmd += (' --' + params['alignment_type'])
        if ('preset_options' in params and params['preset_options']
                is not None) and ('alignment_type' in params
                                  and params['alignment_type'] is not None):
            if (params['alignment_type'] == 'local'):
                bowtie2_cmd += (' --' + params['preset_options'] + '-local')
            else:
                bowtie2_cmd += (' --' + params['preset_options'])
        if ('trim5' in params and params['trim5'] is not None):
            bowtie2_cmd += (' --trim5 ' + str(params['trim5']))
        if ('trim3' in params and params['trim3'] is not None):
            bowtie2_cmd += (' --trim3 ' + str(params['trim3']))
        if ('np' in params and params['np'] is not None):
            bowtie2_cmd += (' --np ' + str(params['np']))
        if ('minins' in params and params['minins'] is not None):
            bowtie2_cmd += (' --minins ' + str(params['minins']))
        if ('maxins' in params and params['maxins'] is not None):
            bowtie2_cmd += (' --maxins ' + str(params['maxins']))
        if ('orientation' in params and params['orientation'] is not None):
            bowtie2_cmd += (' --' + params['orientation'])

        if sample_type == 'KBaseAssembly.SingleEndLibrary':
            lib_type = 'SingleEnd'
            read_id = r_sample['data']['handle']['id']
            read_name = r_sample['data']['handle']['file_name']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read_id,
                    filename=read_name,
                    directory=directory,
                    token=token)
                bowtie2_cmd += " -U {0} -x {1} -S {2}".format(
                    os.path.join(directory, read_name), bowtie2_base, out_file)
            except Exception, e:
                #logger.exception( "Unable to download shock file , {0}".format(read_name))
                raise Exception(
                    "Unable to download shock file , {0}".format(read_name))
        if sample_type == 'KBaseAssembly.PairedEndLibrary':
            lib_type = 'PairedEnd'
            read1_id = r_sample['data']['handle_1']['id']
            read1_name = r_sample['data']['handle_1']['file_name']
            read2_id = r_sample['data']['handle_2']['id']
            read2_name = r_sample['data']['handle_2']['file_name']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read1_id,
                    filename=read1_name,
                    directory=directory,
                    token=token)
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read2_id,
                    filename=read2_name,
                    directory=directory,
                    token=token)
                bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(
                    os.path.join(directory, read1_name),
                    os.path.join(directory, read2_name), bowtie2_base,
                    out_file)
            except Exception, e:
                #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                raise Exception(
                    "Unable to download shock file , {0} or {1}".format(
                        read1_name, read2_name))

Exemple #12

0

Afficher le fichier

Fichier : parallel_tools.py Projet : ugswork/KBaseRNASeq

def _CallTophat(logger, services, ws_client, hs, ws_id, sample_type,
                num_threads, read_sample, gtf_file, condition, directory,
                bowtie2index_id, genome_id, sampleset_id, params, token):
    print "Downloading Read Sample{0}".format(read_sample)
    if not logger:
        logger = create_logger(directory, "run_Tophat_" + read_sample)
    try:
        r_sample = ws_client.get_objects([{
            'name': read_sample,
            'workspace': ws_id
        }])[0]
        r_sample_info = ws_client.get_object_info_new(
            {"objects": [{
                'name': read_sample,
                'workspace': ws_id
            }]})[0]
        sample_type = r_sample_info[2].split('-')[0]
        output_name = read_sample.split('.')[0] + "_tophat_alignment"
        output_dir = os.path.join(directory, output_name)
        #if not os.path.exists(output_dir): os.makedirs(output_dir)
        #out_file = output_dir +"/accepted_hits.sam"
        bowtie2_base = os.path.join(
            directory,
            handler_util.get_file_with_suffix(directory, ".rev.1.bt2"))

        ### Adding advanced options to tophat command
        tophat_cmd = (' -p ' + str(num_threads))
        if ('max_intron_length' in params
                and params['max_intron_length'] is not None):
            tophat_cmd += (' -I ' + str(params['max_intron_length']))
        if ('min_intron_length' in params
                and params['min_intron_length'] is not None):
            tophat_cmd += (' -i ' + str(params['min_intron_length']))
        if ('min_anchor_length' in params
                and params['min_anchor_length'] is not None):
            tophat_cmd += (' -a ' + str(params['min_anchor_length']))
        if ('read_edit_dist' in params
                and params['read_edit_dist'] is not None):
            tophat_cmd += (' --read-edit-dist ' +
                           str(params['read_edit_dist']))
        if ('read_gap_length' in params
                and params['read_gap_length'] is not None):
            tophat_cmd += (' --read-gap-length ' +
                           str(params['read_gap_length']))
        if ('read_mismatches' in params
                and params['read_mismatches'] is not None):
            tophat_cmd += (' -N ' + str(params['read_mismatches']))
        if ('library_type' in params and params['library_type'] is not None):
            tophat_cmd += (' --library-type ' + params['library_type'])
        if ('report_secondary_alignments' in params
                and int(params['report_secondary_alignments']) == 1):
            tophat_cmd += ' --report-secondary-alignments'
        if ('no_coverage_search' in params
                and int(params['no_coverage_search']) == 1):
            tophat_cmd += ' --no-coverage-search'
        if ('preset_options' in params
                and params['preset_options'] is not None):
            tophat_cmd += ' --' + params['preset_options']
        if sample_type == 'KBaseAssembly.SingleEndLibrary':
            lib_type = 'SingleEnd'
            read_id = r_sample['data']['handle']['id']
            read_name = r_sample['data']['handle']['file_name']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read_id,
                    filename=read_name,
                    directory=directory,
                    token=token)
                tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(
                    output_dir, gtf_file, bowtie2_base,
                    os.path.join(directory, read_name))
            except Exception, e:
                raise Exception(
                    "Unable to download shock file , {0}".format(read_name))
        if sample_type == 'KBaseAssembly.PairedEndLibrary':
            lib_type = 'PairedEnd'
            read1_id = r_sample['data']['handle_1']['id']
            read1_name = r_sample['data']['handle_1']['file_name']
            read2_id = r_sample['data']['handle_2']['id']
            read2_name = r_sample['data']['handle_2']['file_name']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read1_id,
                    filename=read1_name,
                    directory=directory,
                    token=token)
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read2_id,
                    filename=read2_name,
                    directory=directory,
                    token=token)
                tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(
                    output_dir, gtf_file, bowtie2_base,
                    os.path.join(directory, read1_name),
                    os.path.join(directory, read2_name))
            except Exception, e:
                raise Exception(
                    "Unable to download shock file , {0} or {1}".format(
                        read1_name, read2_name))

Exemple #13

0

Afficher le fichier

Fichier : call_hisat2.py Projet : ugswork/KBaseRNASeq

def _CallHisat2(logger, services, ws_client, hs, ws_id, sample_type,
                num_threads, read_sample, condition, directory, genome_id,
                sampleset_id, params, token):
    #logger.info("Downloading Read Sample{0}".format(read_sample))
    print "Downloading Read Sample{0}".format(read_sample)
    if not logger:
        logger = handler_util.create_logger(directory,
                                            "run_Hisat2_" + read_sample)
    logger.info("Downloading Read Sample{0}".format(read_sample))
    try:
        r_sample = ws_client.get_objects([{
            'name': read_sample,
            'workspace': ws_id
        }])[0]
        r_sample_info = ws_client.get_object_info_new(
            {"objects": [{
                'name': read_sample,
                'workspace': ws_id
            }]})[0]
        sample_type = r_sample_info[2].split('-')[0]
        input_direc = os.path.join(directory,
                                   read_sample.split('.')[0] + "_hisat2_input")
        if not os.path.exists(input_direc): os.mkdir(input_direc)
        output_name = read_sample.split('.')[0] + "_hisat2_alignment"
        output_dir = os.path.join(directory, output_name)
        if not os.path.exists(output_dir): os.mkdir(output_dir)
        hisat2_base = os.path.join(
            directory, handler_util.get_file_with_suffix(directory, ".1.ht2"))
        ### Adding advanced options to Bowtie2Call
        hisat2_cmd = ''
        hisat2_cmd += (' -p {0}'.format(num_threads))
        if ('quality_score' in params and params['quality_score'] is not None):
            hisat2_cmd += (' --' + params['quality_score'])
        if ('alignment_type' in params
                and params['alignment_type'] is not None):
            hisat2_cmd += (' --' + params['alignment_type'])
        if ('trim5' in params and params['trim5'] is not None):
            hisat2_cmd += (' --trim5 ' + str(params['trim5']))
        if ('trim3' in params and params['trim3'] is not None):
            hisat2_cmd += (' --trim3 ' + str(params['trim3']))
        if ('np' in params and params['np'] is not None):
            hisat2_cmd += (' --np ' + str(params['np']))
        if ('minins' in params and params['minins'] is not None):
            hisat2_cmd += (' --minins ' + str(params['minins']))
        if ('maxins' in params and params['maxins'] is not None):
            hisat2_cmd += (' --maxins ' + str(params['maxins']))
        #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
        if ('min_intron_length' in params
                and params['min_intron_length'] is not None):
            hisat2_cmd += (' --min-intronlen ' +
                           str(params['min_intron_length']))
        if ('max_intron_length' in params
                and params['max_intron_length'] is not None):
            hisat2_cmd += (' --max-intronlen ' +
                           str(params['max_intron_length']))
        if ('no_spliced_alignment' in params
                and params['no_spliced_alignment'] != 0):
            hisat2_cmd += (' --no-spliced-alignment')
        if ('transcriptome_mapping_only' in params
                and params['transcriptome_mapping_only'] != 0):
            hisat2_cmd += (' --transcriptome-mapping-only')
        if ('tailor_alignments' in params
                and params['tailor_alignments'] is not None):
            hisat2_cmd += (' --' + params['tailor_alignments'])
        out_file = output_dir + "/accepted_hits.sam"
        if sample_type == 'KBaseAssembly.SingleEndLibrary':
            lib_type = 'SingleEnd'
            read_id = r_sample['data']['handle']['id']
            read_name = r_sample['data']['handle']['file_name']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read_id,
                    filename=read_name,
                    directory=input_direc,
                    token=token)
                hisat2_cmd += " -U {0} -x {1} -S {2}".format(
                    os.path.join(input_direc, read_name), hisat2_base,
                    out_file)
            except Exception, e:
                #logger.exception( "Unable to download shock file , {0}".format(read_name))
                raise Exception(
                    "Unable to download shock file , {0}".format(read_name))
        if sample_type == 'KBaseAssembly.PairedEndLibrary':
            lib_type = 'PairedEnd'
            if ('orientation' in params and params['orientation'] is not None):
                hisat2_cmd += (' --' + params['orientation'])
            read1_id = r_sample['data']['handle_1']['id']
            read1_name = r_sample['data']['handle_1']['file_name']
            read2_id = r_sample['data']['handle_2']['id']
            read2_name = r_sample['data']['handle_2']['file_name']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read1_id,
                    filename=read1_name,
                    directory=input_direc,
                    token=token)
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read2_id,
                    filename=read2_name,
                    directory=input_direc,
                    token=token)
                hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(
                    os.path.join(input_direc, read1_name),
                    os.path.join(output_dir, read2_name), hisat2_base,
                    out_file)
            except Exception, e:
                #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                raise Exception(
                    "Unable to download shock file , {0} or {1}".format(
                        read1_name, read2_name))

Exemple #14

0

Afficher le fichier

Fichier : call_hisat2.py Projet : ugswork/KBaseRNASeq

 ret = script_util.if_obj_exists(None, ws_client, params['ws_id'],
                                 "KBaseRNASeq.GFFAnnotation", [ws_gtf])
 print ret
 if not ret is None:
     logger.info(
         "GFF Annotation Exist for Genome Annotation {0}.... Skipping step "
         .format(params['genome_id']))
     annot_name, annot_id = ret[0]
     gtf_obj = ws_client.get_objects([{'ref': annot_id}])[0]
     gtf_id = gtf_obj['data']['handle']['id']
     gtf_name = gtf_obj['data']['handle']['file_name']
     try:
         script_util.download_file_from_shock(
             logger,
             shock_service_url=services['shock_service_url'],
             shock_id=gtf_id,
             filename=gtf_name,
             directory=hisat2_dir,
             token=token)
         gtf_file = os.path.join(hisat2_dir, gtf_name)
     except Exception, e:
         raise Exception(
             "Unable to download shock file, {0}".format(gtf_name))
 else:
     script_util.create_gtf_annotation(logger, ws_client, hs, services,
                                       params['ws_id'], annotation_id,
                                       params['genome_id'], fasta_file,
                                       hisat2_dir, token)
 # Determine the num_threads provided by the user otherwise default the number of threads to 2
 if ('num_threads' in params and params['num_threads'] is not None):
     num_threads = int(params['num_threads'])

Exemple #15

0

Afficher le fichier

Fichier : KBaseGenomeUtilImpl.py Projet : kbase/genome_util

    def blast_against_genome(self, ctx, params):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN blast_against_genome

        # TODO: Rename blast_search

        try:
           self.__LOGGER.info( "Preparing FA")
           if len(params['query']) > 5:
               sequence=params['query']
           else:
               self.__LOGGER.error("The input sequence is too short!")
               raise KBaseGenomeUtilException("The input sequence is too short!")
        
           if not os.path.exists(self.__TEMP_DIR): os.makedirs(self.__TEMP_DIR)
         
           #print "generate input file for query sequence\n"
           query_fn = "%s/%s" %(self.__TEMP_DIR, self.__QUERY_FA)
           target=open(query_fn,'w')
           if sequence.startswith(">"):
             target.write(sequence)
           else:
             seqes = sequence.split("\n")
             for i in range(len(seqes)):
               target.write(">query_seq_%d\n" %(i))
               target.write(seqes[i])
           target.close()
         
           user_token=ctx['token']
           svc_token = Token(user_id=self.__SVC_USER, password=self.__SVC_PASS).token
           ws_client=Workspace(url=self.__WS_URL, token=user_token)
        
        
           err_msg = ""
        
           blast_dir =self.__BLAST_DIR
           if os.path.exists(blast_dir):
               files=glob.glob("%s/*" % blast_dir)
               for f in files: os.remove(f)
           if not os.path.exists(blast_dir): os.makedirs(blast_dir)
           target_fn = "%s/%s" %( blast_dir, self.__GENOME_FA)
           if 'target_seqs' in params:
               # let's build index directly and throw away
               sequence = params['target_seqs']
        
               target=open(target_fn,'w')
               if sequence.startswith(">"):
                 target.write(sequence)
               else:
                 seqes = sequence.split("\n")
                 for i in range(len(seqes)):
                   target.write(">target_seq_%d\n" %(i))
                   target.write(seqes[i])
               target.close()
            
               if(self.__INDEX_TYPE[params['blast_program']]  == 'protein_db'):
                   formatdb_type='T'
               elif(self.__INDEX_TYPE[params['blast_program']]  == 'transcript_db'):
                   formatdb_type='F'
               else:
                   self.__LOGGER.error("{0} is not yet supported".format(params['blast_program']))
                   raise KBaseGenomeUtilException("{0} is not yet supported".format(params['blast_program']))
               cmdstring="%s -i %s -p %s -o T" %(self.__INDEX_CMD, target_fn, formatdb_type)
               # TODO: replace it to subprocess.Popen
               tool_process = subprocess.Popen(cmdstring, stderr=subprocess.PIPE, shell=True)
               stdout, stderr = tool_process.communicate()
   
               if stdout is not None and len(stdout) > 0:
                   self.__LOGGER.info(stdout)
   
               if stderr is not None and len(stderr) > 0:
                   self.__LOGGER.error("Index error: " + stderr)
                   raise KBaseGenomeUtilException("Index error: " + stderr)
        
           else:
               try:
                   blast_indexes=ws_client.get_object_subset([{'name':params['blastindex_name'],
                                                             'workspace': params['ws_id'], 
                                                             'included':['handle', 'index_type']}])
               except:
                   self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['blastindex_name']))
                   raise KBaseGenomeUtilException("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['genome_ids'][0]))
                   
               if len(blast_indexes) < 1:
                   self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['blastindex_name']))
                   raise KBaseGenomeUtilException("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['genome_ids'][0]))
        
               
               # TODO: Add err handling
               zip_fn = blast_indexes[0]['data']['handle']['file_name']
               target_fn = "%s/%s" %(blast_dir, zip_fn[:-4]) # remove '.zip'
        
               if(self.__INDEX_TYPE[params['blast_program']]  == 'protein_db'):
                   target_fn += '_aa.fa'
                   if blast_indexes[0]['data']['index_type'] == 'none' or blast_indexes[0]['data']['index_type'] == "nucleotide":
                       self.__LOGGER.error("The index object does not contain amino acid sequence indexes")
                       raise KBaseGenomeUtilException("The index object does not contain amino acid sequence indexes")                    
               elif(self.__INDEX_TYPE[params['blast_program']]  == 'transcript_db'):
                   target_fn += '_nt.fa'
                   if blast_indexes[0]['data']['index_type'] == 'none' or blast_indexes[0]['data']['index_type'] == "protein":
                       self.__LOGGER.error("The index object does not contain nucleotide sequence indexes")
                       raise KBaseGenomeUtilException("The index object does not contain nucleotide sequence indexes")                    
               else:
                   self.__LOGGER.error("{0} is not yet supported".format(params['blast_program']))
                   raise KBaseGenomeUtilException("{0} is not yet supported".format(params['blast_program']))
        
               # TODO: Add err handling
               zip_fn = blast_indexes[0]['data']['handle']['file_name']
               #pprint(blast_indexes[0])
              
               self.__LOGGER.info("Downloading the genome index")
               #hs = HandleService(url=self.__HS_URL, token=user_token)
               try:
                   script_util.download_file_from_shock(self.__LOGGER,
                                   shock_service_url= blast_indexes[0]['data']['handle']['url'],
                                   shock_id= blast_indexes[0]['data']['handle']['id'],
                                   filename= blast_indexes[0]['data']['handle']['file_name'],
                                   directory= '.',
                                   token = user_token)
               except Exception, e:
                   self.__LOGGER.error("Downloading error from shock: Please contact [email protected]")
                   raise KBaseGenomeUtilException("Downloading error from shock: Please contact [email protected]")
               try:
                   script_util.unzip_files(self.__LOGGER, zip_fn, blast_dir)
               except Exception, e:
                   self.__LOGGER.error("Unzip indexfile error: Please contact [email protected]")
                   raise KBaseGenomeUtilException("Unzip indexfile error: Please contact [email protected]")

Exemple #16

0

Afficher le fichier

Fichier : parallel_tools.py Projet : pranjan77/KBaseRNASeq

def _CallBowtie2(
    logger,
    services,
    ws_client,
    hs,
    ws_id,
    sample_type,
    num_threads,
    read_sample,
    condition,
    directory,
    bowtie2index_id,
    genome_id,
    sampleset_id,
    params,
    token,
):
    # logger.info("Downloading Read Sample{0}".format(read_sample))
    print "Downloading Read Sample{0}".format(read_sample)
    if not logger:
        logger = create_logger(directory, "run_Bowtie2_" + read_sample)

    logger.info("Downloading Read Sample{0}".format(read_sample))
    try:
        r_sample = ws_client.get_objects([{"name": read_sample, "workspace": ws_id}])[0]
        r_sample_info = ws_client.get_object_info_new({"objects": [{"name": read_sample, "workspace": ws_id}]})[0]
        sample_type = r_sample_info[2].split("-")[0]
        output_name = read_sample.split(".")[0] + "_bowtie2_alignment"
        output_dir = os.path.join(directory, output_name)
        if not os.path.exists(output_dir):
            os.mkdir(output_dir)
        out_file = output_dir + "/accepted_hits.sam"
        bowtie2_base = os.path.join(directory, handler_util.get_file_with_suffix(directory, ".rev.1.bt2"))
        ### Adding advanced options to Bowtie2Call
        bowtie2_cmd = ""
        bowtie2_cmd += " -p {0}".format(num_threads)
        if "quality_score" in params and params["quality_score"] is not None:
            bowtie2_cmd += " --" + params["quality_score"]
        if "alignment_type" in params and params["alignment_type"] is not None:
            bowtie2_cmd += " --" + params["alignment_type"]
        if ("preset_options" in params and params["preset_options"] is not None) and (
            "alignment_type" in params and params["alignment_type"] is not None
        ):
            if params["alignment_type"] == "local":
                bowtie2_cmd += " --" + params["preset_options"] + "-local"
            else:
                bowtie2_cmd += " --" + params["preset_options"]
        if "trim5" in params and params["trim5"] is not None:
            bowtie2_cmd += " --trim5 " + str(params["trim5"])
        if "trim3" in params and params["trim3"] is not None:
            bowtie2_cmd += " --trim3 " + str(params["trim3"])
        if "np" in params and params["np"] is not None:
            bowtie2_cmd += " --np " + str(params["np"])
        if "minins" in params and params["minins"] is not None:
            bowtie2_cmd += " --minins " + str(params["minins"])
        if "maxins" in params and params["maxins"] is not None:
            bowtie2_cmd += " --maxins " + str(params["maxins"])
        if "orientation" in params and params["orientation"] is not None:
            bowtie2_cmd += " --" + params["orientation"]

        if sample_type == "KBaseAssembly.SingleEndLibrary":
            lib_type = "SingleEnd"
            read_id = r_sample["data"]["handle"]["id"]
            read_name = r_sample["data"]["handle"]["file_name"]
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services["shock_service_url"],
                    shock_id=read_id,
                    filename=read_name,
                    directory=directory,
                    token=token,
                )
                bowtie2_cmd += " -U {0} -x {1} -S {2}".format(
                    os.path.join(directory, read_name), bowtie2_base, out_file
                )
            except Exception, e:
                # logger.exception( "Unable to download shock file , {0}".format(read_name))
                raise Exception("Unable to download shock file , {0}".format(read_name))
        if sample_type == "KBaseAssembly.PairedEndLibrary":
            lib_type = "PairedEnd"
            read1_id = r_sample["data"]["handle_1"]["id"]
            read1_name = r_sample["data"]["handle_1"]["file_name"]
            read2_id = r_sample["data"]["handle_2"]["id"]
            read2_name = r_sample["data"]["handle_2"]["file_name"]
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services["shock_service_url"],
                    shock_id=read1_id,
                    filename=read1_name,
                    directory=directory,
                    token=token,
                )
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services["shock_service_url"],
                    shock_id=read2_id,
                    filename=read2_name,
                    directory=directory,
                    token=token,
                )
                bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(
                    os.path.join(directory, read1_name), os.path.join(directory, read2_name), bowtie2_base, out_file
                )
            except Exception, e:
                # logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                raise Exception("Unable to download shock file , {0} or {1}".format(read1_name, read2_name))

Exemple #17

0

Afficher le fichier

Fichier : parallel_tools.py Projet : pranjan77/KBaseRNASeq

def _CallTophat(
    logger,
    services,
    ws_client,
    hs,
    ws_id,
    sample_type,
    num_threads,
    read_sample,
    gtf_file,
    condition,
    directory,
    bowtie2index_id,
    genome_id,
    sampleset_id,
    params,
    token,
):
    print "Downloading Read Sample{0}".format(read_sample)
    if not logger:
        logger = create_logger(directory, "run_Tophat_" + read_sample)
    try:
        r_sample = ws_client.get_objects([{"name": read_sample, "workspace": ws_id}])[0]
        r_sample_info = ws_client.get_object_info_new({"objects": [{"name": read_sample, "workspace": ws_id}]})[0]
        sample_type = r_sample_info[2].split("-")[0]
        output_name = read_sample.split(".")[0] + "_tophat_alignment"
        output_dir = os.path.join(directory, output_name)
        # if not os.path.exists(output_dir): os.makedirs(output_dir)
        # out_file = output_dir +"/accepted_hits.sam"
        bowtie2_base = os.path.join(directory, handler_util.get_file_with_suffix(directory, ".rev.1.bt2"))

        ### Adding advanced options to tophat command
        tophat_cmd = " -p " + str(num_threads)
        if "max_intron_length" in params and params["max_intron_length"] is not None:
            tophat_cmd += " -I " + str(params["max_intron_length"])
        if "min_intron_length" in params and params["min_intron_length"] is not None:
            tophat_cmd += " -i " + str(params["min_intron_length"])
        if "min_anchor_length" in params and params["min_anchor_length"] is not None:
            tophat_cmd += " -a " + str(params["min_anchor_length"])
        if "read_edit_dist" in params and params["read_edit_dist"] is not None:
            tophat_cmd += " --read-edit-dist " + str(params["read_edit_dist"])
        if "read_gap_length" in params and params["read_gap_length"] is not None:
            tophat_cmd += " --read-gap-length " + str(params["read_gap_length"])
        if "read_mismatches" in params and params["read_mismatches"] is not None:
            tophat_cmd += " -N " + str(params["read_mismatches"])
        if "library_type" in params and params["library_type"] is not None:
            tophat_cmd += " --library-type " + params["library_type"]
        if "report_secondary_alignments" in params and int(params["report_secondary_alignments"]) == 1:
            tophat_cmd += " --report-secondary-alignments"
        if "no_coverage_search" in params and int(params["no_coverage_search"]) == 1:
            tophat_cmd += " --no-coverage-search"
        if "preset_options" in params and params["preset_options"] is not None:
            tophat_cmd += " --" + params["preset_options"]
        if sample_type == "KBaseAssembly.SingleEndLibrary":
            lib_type = "SingleEnd"
            read_id = r_sample["data"]["handle"]["id"]
            read_name = r_sample["data"]["handle"]["file_name"]
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services["shock_service_url"],
                    shock_id=read_id,
                    filename=read_name,
                    directory=directory,
                    token=token,
                )
                tophat_cmd += " -o {0} -G {1} {2} {3}".format(
                    output_dir, gtf_file, bowtie2_base, os.path.join(directory, read_name)
                )
            except Exception, e:
                raise Exception("Unable to download shock file , {0}".format(read_name))
        if sample_type == "KBaseAssembly.PairedEndLibrary":
            lib_type = "PairedEnd"
            read1_id = r_sample["data"]["handle_1"]["id"]
            read1_name = r_sample["data"]["handle_1"]["file_name"]
            read2_id = r_sample["data"]["handle_2"]["id"]
            read2_name = r_sample["data"]["handle_2"]["file_name"]
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services["shock_service_url"],
                    shock_id=read1_id,
                    filename=read1_name,
                    directory=directory,
                    token=token,
                )
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services["shock_service_url"],
                    shock_id=read2_id,
                    filename=read2_name,
                    directory=directory,
                    token=token,
                )
                tophat_cmd += " -o {0} -G {1} {2} {3} {4}".format(
                    output_dir,
                    gtf_file,
                    bowtie2_base,
                    os.path.join(directory, read1_name),
                    os.path.join(directory, read2_name),
                )
            except Exception, e:
                raise Exception("Unable to download shock file , {0} or {1}".format(read1_name, read2_name))

Exemple #18

0

Afficher le fichier

Fichier : StringTie.py Projet : ugswork/KBaseRNASeq

    def runEach(self, task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']

        s_alignment = task_params['job_id']
        gtf_file = task_params['gtf_file']
        directory = task_params['stringtie_dir']
        genome_id = task_params['genome_id']
        annotation_id = task_params['annotation_id']
        sample_id = task_params['sample_id']
        alignmentset_id = task_params['alignmentset_id']
        ws_id = task_params['ws_id']

        print "Downloading Sample Alignment from workspace {0}".format(
            s_alignment)
        logger.info("Downloading Sample Alignment from workspace {0}".format(
            s_alignment))
        alignment_name = ws_client.get_object_info([{
            "ref": s_alignment
        }],
                                                   includeMetadata=None)[0][1]
        if not logger:
            logger = handler_util.create_logger(
                directory, "run_Stringtie_" + alignment_name)
        try:
            alignment = ws_client.get_objects([{'ref': s_alignment}])[0]
            input_direc = os.path.join(
                directory,
                alignment_name.split('_alignment')[0] + "_stringtie_input")
            if not os.path.exists(input_direc): os.mkdir(input_direc)
            output_name = alignment_name.split(
                '_alignment')[0] + "_stringtie_expression"
            output_dir = os.path.join(directory, output_name)
            #Download Alignment from shock
            a_file_id = alignment['data']['file']['id']
            a_filename = alignment['data']['file']['file_name']
            condition = alignment['data']['condition']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=self.urls['shock_service_url'],
                    shock_id=a_file_id,
                    filename=a_filename,
                    directory=input_direc,
                    token=token)
            except Exception, e:
                raise Exception(
                    "Unable to download shock file, {0},{1}".format(
                        a_filename, "".join(traceback.format_exc())))
            try:
                input_dir = os.path.join(input_direc, alignment_name)
                if not os.path.exists(input_dir): os.mkdir(input_dir)
                script_util.unzip_files(logger,
                                        os.path.join(input_direc, a_filename),
                                        input_dir)
            except Exception, e:
                raise Exception(e)
                logger.error("".join(traceback.format_exc()))
                raise Exception("Unzip alignment files  error")

Exemple #19

0

Afficher le fichier

Fichier : KBaseGenomeUtilImpl.py Projet : sean-mccorkle/KBaseRNASeq

    def blast_against_genome(self, ctx, params):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN blast_against_genome

        # TODO: Rename blast_search

        try:
           self.__LOGGER.info( "Preparing FA")
           if len(params['query']) > 5:
               sequence=params['query']
           else:
               self.__LOGGER.error("The input sequence is too short!")
               raise KBaseGenomeUtilException("The input sequence is too short!")
        
           if not os.path.exists(self.__TEMP_DIR): os.makedirs(self.__TEMP_DIR)
         
           #print "generate input file for query sequence\n"
           query_fn = "%s/%s" %(self.__TEMP_DIR, self.__QUERY_FA)
           target=open(query_fn,'w')
           if sequence.startswith(">"):
             target.write(sequence)
           else:
             seqes = sequence.split("\n")
             for i in range(len(seqes)):
               target.write(">query_seq_%d\n" %(i))
               target.write(seqes[i])
           target.close()
         
           user_token=ctx['token']
           svc_token = Token(user_id=self.__SVC_USER, password=self.__SVC_PASS).token
           ws_client=Workspace(url=self.__WS_URL, token=user_token)
        
        
           err_msg = ""
        
           blast_dir =self.__BLAST_DIR
           if os.path.exists(blast_dir):
               files=glob.glob("%s/*" % blast_dir)
               for f in files: os.remove(f)
           if not os.path.exists(blast_dir): os.makedirs(blast_dir)
           target_fn = "%s/%s" %( blast_dir, self.__GENOME_FA)
           if 'target_seqs' in params:
               # let's build index directly and throw away
               sequence = params['target_seqs']
        
               target=open(target_fn,'w')
               if sequence.startswith(">"):
                 target.write(sequence)
               else:
                 seqes = sequence.split("\n")
                 for i in range(len(seqes)):
                   target.write(">target_seq_%d\n" %(i))
                   target.write(seqes[i])
               target.close()
            
               if(self.__INDEX_TYPE[params['blast_program']]  == 'protein_db'):
                   formatdb_type='T'
               elif(self.__INDEX_TYPE[params['blast_program']]  == 'transcript_db'):
                   formatdb_type='F'
               else:
                   self.__LOGGER.error("{0} is not yet supported".format(params['blast_program']))
                   raise KBaseGenomeUtilException("{0} is not yet supported".format(params['blast_program']))
               cmdstring="%s -i %s -p %s -o T" %(self.__INDEX_CMD, target_fn, formatdb_type)
               # TODO: replace it to subprocess.Popen
               tool_process = subprocess.Popen(cmdstring, stderr=subprocess.PIPE, shell=True)
               stdout, stderr = tool_process.communicate()
   
               if stdout is not None and len(stdout) > 0:
                   self.__LOGGER.info(stdout)
   
               if stderr is not None and len(stderr) > 0:
                   self.__LOGGER.error("Index error: " + stderr)
                   raise KBaseGenomeUtilException("Index error: " + stderr)
        
           else:
               try:
                   blast_indexes=ws_client.get_object_subset([{'name':params['blastindex_name'],
                                                             'workspace': params['ws_id'], 
                                                             'included':['handle', 'index_type']}])
               except:
                   self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['blastindex_name']))
                   raise KBaseGenomeUtilException("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['genome_ids'][0]))
                   
               if len(blast_indexes) < 1:
                   self.__LOGGER.error("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['blastindex_name']))
                   raise KBaseGenomeUtilException("Couldn't find %s:%s from the workspace" %(params['ws_id'],params['genome_ids'][0]))
        
               
               # TODO: Add err handling
               zip_fn = blast_indexes[0]['data']['handle']['file_name']
               target_fn = "%s/%s" %(blast_dir, zip_fn[:-4]) # remove '.zip'
        
               if(self.__INDEX_TYPE[params['blast_program']]  == 'protein_db'):
                   target_fn += '_aa.fa'
                   if blast_indexes[0]['data']['index_type'] == 'none' or blast_indexes[0]['data']['index_type'] == "nucleotide":
                       self.__LOGGER.error("The index object does not contain amino acid sequence indexes")
                       raise KBaseGenomeUtilException("The index object does not contain  amino acid sequence indexes. This index will only work with blastn (nucleotide query, nucleotide index), tblastx(protein query, nucleotide index) and tblastx(nucleotide query, nucleotide index)")
               elif(self.__INDEX_TYPE[params['blast_program']]  == 'transcript_db'):
                   target_fn += '_nt.fa'
                   if blast_indexes[0]['data']['index_type'] == 'none' or blast_indexes[0]['data']['index_type'] == "protein":
                       self.__LOGGER.error("The index object does not contain nucleotide sequence indexes")
                       raise KBaseGenomeUtilException("The index object does not contain nucleotide sequence indexes. This index will only work with blastp (protein query, protein index) and blastx(nucleotide query, protein index)")                    
               else:
                   self.__LOGGER.error("{0} is not yet supported".format(params['blast_program']))
                   raise KBaseGenomeUtilException("{0} is not yet supported".format(params['blast_program']))
        
               # TODO: Add err handling
               zip_fn = blast_indexes[0]['data']['handle']['file_name']
               #pprint(blast_indexes[0])
              
               self.__LOGGER.info("Downloading the genome index")
               #hs = HandleService(url=self.__HS_URL, token=user_token)
               try:
                   script_util.download_file_from_shock(self.__LOGGER,
                                   shock_service_url= blast_indexes[0]['data']['handle']['url'],
                                   shock_id= blast_indexes[0]['data']['handle']['id'],
                                   filename= blast_indexes[0]['data']['handle']['file_name'],
                                   directory= '.',
                                   token = user_token)
               except Exception, e:
                   self.__LOGGER.error("Downloading error from shock: Please contact [email protected]")
                   raise KBaseGenomeUtilException("Downloading error from shock: Please contact [email protected]")
               try:
                   script_util.unzip_files(self.__LOGGER, zip_fn, blast_dir)
               except Exception, e:
                   self.__LOGGER.error("Unzip indexfile error: Please contact [email protected]")
                   raise KBaseGenomeUtilException("Unzip indexfile error: Please contact [email protected]")

Exemple #20

0

Afficher le fichier

Fichier : Tophat.py Projet : pranjan77/KBaseRNASeq

    def runEach(self,task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        
        read_sample = task_params['job_id']
        condition = task_params['label']
        directory = task_params['tophat_dir']
        ws_id = task_params['ws_id']
        reads_type = task_params['reads_type']
        genome_id = task_params['annotation_id']
        sampleset_id = task_params['sampleset_id']
	gtf_file = task_params['gtf_file']

        print "Downloading Read Sample{0}".format(read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
		r_sample = ws_client.get_objects(
                                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
		r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]	
		sample_type = r_sample_info[2].split('-')[0]
		output_name = read_sample.split('.')[0]+"_tophat_alignment"
		output_dir = os.path.join(directory,output_name)
	        #if not os.path.exists(output_dir): os.makedirs(output_dir)
            	#out_file = output_dir +"/accepted_hits.sam"
            	bowtie2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".rev.1.bt2"))
                ### Adding advanced options to Bowtie2Call
		tophat_cmd = (' -p '+str(self.num_threads))
            	if('max_intron_length' in params and params['max_intron_length'] is not None ) : tophat_cmd += (' -I '+str(params['max_intron_length']))
            	if('min_intron_length' in params and params['min_intron_length'] is not None ): tophat_cmd += (' -i '+str(params['min_intron_length']))
            	if('min_anchor_length' in params and params['min_anchor_length'] is not None ): tophat_cmd += (' -a '+str(params['min_anchor_length']))
            	if('read_edit_dist' in params and params['read_edit_dist'] is not None ) : tophat_cmd += (' --read-edit-dist '+str(params['read_edit_dist']))
            	if('read_gap_length' in params and params['read_gap_length'] is not None) : tophat_cmd += (' --read-gap-length '+str(params['read_gap_length']))
            	if('read_mismatches' in params and params['read_mismatches'] is not None) : tophat_cmd += (' -N '+str(params['read_mismatches']))
            	if('library_type' in params and params['library_type']  is not None ) : tophat_cmd += (' --library-type ' + params['library_type'])
            	if('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1) : tophat_cmd += ' --report-secondary-alignments'
            	if('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search'
            	if('preset_options' in params and params['preset_options'] is not None ): tophat_cmd += ' --'+params['preset_options']
                #out_file = output_dir +"/accepted_hits.sam"
                if sample_type  == 'KBaseAssembly.SingleEndLibrary':
                        lib_type = 'SingleEnd'
                        read_id = r_sample['data']['handle']['id']
                        read_name =  r_sample['data']['handle']['file_name']
                        try:
                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token)
                		tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name))
                        except Exception,e:
                                self.logger.exception(e)
                                raise Exception( "Unable to download shock file , {0}".format(read_name))
                if sample_type == 'KBaseAssembly.PairedEndLibrary':
                        lib_type = 'PairedEnd'
                        if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
                        read1_id = r_sample['data']['handle_1']['id']
                        read1_name = r_sample['data']['handle_1']['file_name']
                        read2_id = r_sample['data']['handle_2']['id']
                        read2_name = r_sample['data']['handle_2']['file_name']
                        try:
                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token)
                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token)
                		tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name))
                        except Exception,e:
                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))

Exemple #21

0

Afficher le fichier

Fichier : kb_cummerbundImpl.py Projet : r2sunita/kb_cummerbund

    def generate_cummerbund_plots(self, ctx, cummerbundParams):
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN generate_cummerbund_plots

        params    = cummerbundParams
        returnVal = params['ws_cummerbund_output']

        #Set up workspace client
        user_token = ctx['token']
        ws_client  = Workspace(url=self.__WS_URL, token=user_token)

        #Read the input cuffdiff workspace object json file and get filehandle for cuffdiff tar file
        s_res = ws_client.get_objects([{
            'name' : params['ws_cuffdiff_id'],
            'workspace' : params['workspace_name']
            }])

        # Check if workspace has data
        if len(s_res) == 0:
            self.__LOGGER.info("Workspace did not return any objects")
            return returnVal

        # Get input data Shock Id and Filename.
        cuffdiff_shock_id = s_res[0]['data']['file']['id']
        cuffdiff_file_name = s_res[0]['data']['file']['file_name']

        #cuffdiff_file_name =None 
        filesize = None

        # Download tar file
        dx = script_util.download_file_from_shock( self.__LOGGER, 
            self.__SHOCK_URL, cuffdiff_shock_id, cuffdiff_file_name,
            self.__SCRATCH, filesize, user_token)
    
        #Decompress tar file and keep it in a directory
        tarfile = join(self.__SCRATCH, cuffdiff_file_name)
        dstnExtractFolder = join(self.__SCRATCH, "cuffdiffData")
        if not os.path.exists(dstnExtractFolder):
            os.makedirs(dstnExtractFolder)

        untarStatus = script_util2.untar_files(self.__LOGGER, tarfile, dstnExtractFolder)
        if untarStatus == False:
            self.__LOGGER.info("Problem extracting the archive")
            return returnVal

        foldersinExtractFolder = os.listdir(dstnExtractFolder)

        if len(foldersinExtractFolder) == 0:
            self.__LOGGER.info("Problem extracting the archive")
            return returnVal

        # Run R script to run cummerbund json and update the cummerbund output json file
        cuffdiff_dir = join(dstnExtractFolder, foldersinExtractFolder[0])
	self.__LOGGER.info("Cuffdiff folder = " + cuffdiff_dir)

        # Prepare output object.
        outputobject=dict()

        # Prepare output plot list
        cummerbundplotset=[]

        # List of plots to generate
        plotlist = [
                { 'file': "dispersionplot.R",
                  'title': "Dispersion plot",
                  'description': "Dispersion plot" },
                { 'file': "pcaplot.R",
                  'title': "PCA plot",
                  'description': "PCA plot" },
                { 'file': "fpkmscvplot.R",
                  'title': "FPKM SCV plot",
                  'description': "FPKM SCV plot" }
            ]

        # Iterate through the plotlist and generate the images and json files.
        for plot in plotlist:
            status = script_util2.rplotandupload(self.__LOGGER, self.__SCRATCH, self.__RSCRIPTS,
                plot['file'], self.__SHOCK_URL, self.__HS_URL, user_token,
                cummerbundplotset, plot['title'], plot['description'], cuffdiff_dir)
            if status == False:
                self.__LOGGER.info("Problem generating image and json file - " + plot["file"])


        # Populate the output object
        outputobject['cummerbundplotSet'] = cummerbundplotset

        #TODO: Need to figure out how to get rnaseq experiment id
        outputobject['rnaseq_experiment_id'] = "rnaseq_experiment_id"
        outputobject['cuffdiff_input_id'] = params['ws_cuffdiff_id']

        res = ws_client.save_objects({
            "workspace":params['workspace_name'],
            "objects": [{
                "type":"KBaseRNASeq.cummerbund_output",
                "data":outputobject,
                "name":params["ws_cummerbund_output"]}]
            })

        #END generate_cummerbund_plots

        # At some point might do deeper type checking...
        if not isinstance(returnVal, basestring):
            raise ValueError('Method generate_cummerbund_plots return value ' +
                             'returnVal is not type basestring as required.')
        # return the results
        return [returnVal]

Exemple #22

0

Afficher le fichier

Fichier : Bowtie2.py Projet : pranjan77/KBaseRNASeq

    def runEach(self,task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        
        read_sample = task_params['job_id']
        condition = task_params['label']
        directory = task_params['bowtie2_dir']
        ws_id = task_params['ws_id']
        reads_type = task_params['reads_type']
        genome_id = task_params['annotation_id']
        sampleset_id = task_params['sampleset_id']

        print "Downloading Read Sample{0}".format(read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
                r_sample = ws_client.get_objects(
                                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
                r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]
                sample_type = r_sample_info[2].split('-')[0]
                input_direc = os.path.join(directory,read_sample.split('.')[0]+"_bowtie2_input")
                if not os.path.exists(input_direc): os.mkdir(input_direc)
                output_name = read_sample.split('.')[0]+"_bowtie2_alignment"
                output_dir = os.path.join(directory,output_name)
                if not os.path.exists(output_dir): os.mkdir(output_dir)
                base = handler_util.get_file_with_suffix(directory,".rev.1.bt2")
                bowtie2_base =os.path.join(directory,base)
	
                ### Adding advanced options to Bowtie2Call
                bowtie2_cmd = ''
                bowtie2_cmd += ( ' -p {0}'.format(self.num_threads))
		if('quality_score' in params and params['quality_score'] is not None): bowtie2_cmd += ( ' --'+params['quality_score'])
                if('alignment_type' in params and params['alignment_type'] is not None): bowtie2_cmd += ( ' --'+params['alignment_type'] )
                if('preset_options' in params and params['preset_options'] is not None ) and ('alignment_type' in params and params['alignment_type'] is not None):
                        if (params['alignment_type'] == 'local'):
                                 bowtie2_cmd += (' --'+params['preset_options']+'-local')
                        else: bowtie2_cmd += (' --'+params['preset_options'] )
                if('trim5' in params and params['trim5'] is not None): bowtie2_cmd += ( ' --trim5 '+str(params['trim5']))
                if('trim3' in params and params['trim3'] is not None): bowtie2_cmd += ( ' --trim3 '+str(params['trim3']))
                if('np' in params and params['np'] is not None): bowtie2_cmd += ( ' --np '+str(params['np']))
                if('minins' in params and params['minins'] is not None): bowtie2_cmd += ( ' --minins '+str(params['minins']))
                if('maxins' in params and params['maxins'] is not None): bowtie2_cmd += ( ' --maxins '+str(params['maxins']))

                out_file = output_dir +"/accepted_hits.sam"
                if sample_type  == 'KBaseAssembly.SingleEndLibrary':
                        lib_type = 'SingleEnd'
                        read_id = r_sample['data']['handle']['id']
                        read_name =  r_sample['data']['handle']['file_name']
                        try:
                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token)
                                bowtie2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),bowtie2_base,out_file)
                        except Exception,e:
                                self.logger.exception(e)
                                raise Exception( "Unable to download shock file , {0}".format(read_name))
                if sample_type == 'KBaseAssembly.PairedEndLibrary':
                        lib_type = 'PairedEnd'
                        if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation'])
                        read1_id = r_sample['data']['handle_1']['id']
                        read1_name = r_sample['data']['handle_1']['file_name']
                        read2_id = r_sample['data']['handle_2']['id']
                        read2_name = r_sample['data']['handle_2']['file_name']
                        try:
                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token)
                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token)
                                bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(output_dir,read2_name),bowtie2_base,out_file)
                        except Exception,e:
                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))