Exemple #1
0
def _CallHisat2(logger,services,ws_client,hs,ws_id,sample_type,num_threads,read_sample,condition,directory,genome_id,sampleset_id,params,token):
        #logger.info("Downloading Read Sample{0}".format(read_sample))
        print "Downloading Read Sample{0}".format(read_sample)
        if not logger:
                logger = handler_util.create_logger(directory,"run_Hisat2_"+read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
                r_sample = ws_client.get_objects(
                                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
                r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]
                sample_type = r_sample_info[2].split('-')[0]
                input_direc = os.path.join(directory,read_sample.split('.')[0]+"_hisat2_input")
                if not os.path.exists(input_direc): os.mkdir(input_direc)
                output_name = read_sample.split('.')[0]+"_hisat2_alignment"
                output_dir = os.path.join(directory,output_name)
                if not os.path.exists(output_dir): os.mkdir(output_dir)
                hisat2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".1.ht2"))
                ### Adding advanced options to Bowtie2Call
                hisat2_cmd = ''
                hisat2_cmd += ( ' -p {0}'.format(num_threads))
                if('quality_score' in params and params['quality_score'] is not None): hisat2_cmd += ( ' --'+params['quality_score'])
                if('alignment_type' in params and params['alignment_type'] is not None): hisat2_cmd += ( ' --'+params['alignment_type'] )
                if('trim5' in params and params['trim5'] is not None): hisat2_cmd += ( ' --trim5 '+str(params['trim5']))
                if('trim3' in params and params['trim3'] is not None): hisat2_cmd += ( ' --trim3 '+str(params['trim3']))
                if('np' in params and params['np'] is not None): hisat2_cmd += ( ' --np '+str(params['np']))
                if('minins' in params and params['minins'] is not None): hisat2_cmd += ( ' --minins '+str(params['minins']))
                if('maxins' in params and params['maxins'] is not None): hisat2_cmd += ( ' --maxins '+str(params['maxins']))
                #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
                if('min_intron_length' in params and params['min_intron_length'] is not None): hisat2_cmd += ( ' --min-intronlen '+str(params['min_intron_length']))
                if('max_intron_length' in params and params['max_intron_length'] is not None): hisat2_cmd += ( ' --max-intronlen '+str(params['max_intron_length']))
                if('no_spliced_alignment' in params and params['no_spliced_alignment'] != 0): hisat2_cmd += ( ' --no-spliced-alignment')
                if('transcriptome_mapping_only' in params and params['transcriptome_mapping_only'] != 0): hisat2_cmd += ( ' --transcriptome-mapping-only')
                if('tailor_alignments' in params and params['tailor_alignments'] is not None): 
			hisat2_cmd += ( ' --'+params['tailor_alignments'])
		out_file = output_dir +"/accepted_hits.sam"
                if sample_type  == 'KBaseAssembly.SingleEndLibrary':
                        lib_type = 'SingleEnd'
                        read_id = r_sample['data']['handle']['id']
                        read_name =  r_sample['data']['handle']['file_name']
                        try:
                                script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token)
                                hisat2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),hisat2_base,out_file)
                        except Exception,e:
                                #logger.exception( "Unable to download shock file , {0}".format(read_name))
                                raise Exception( "Unable to download shock file , {0}".format(read_name))
                if sample_type == 'KBaseAssembly.PairedEndLibrary':
                        lib_type = 'PairedEnd'
                	if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
                        read1_id = r_sample['data']['handle_1']['id']
                        read1_name = r_sample['data']['handle_1']['file_name']
                        read2_id = r_sample['data']['handle_2']['id']
                        read2_name = r_sample['data']['handle_2']['file_name']
                        try:
                                script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token)
                                script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token)
                                hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(output_dir,read2_name),hisat2_base,out_file)
                        except Exception,e:
                                #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
Exemple #2
0
def _CallBowtie2(logger,services,ws_client,hs,ws_id,sample_type,num_threads,read_sample,condition,directory,bowtie2index_id,genome_id,sampleset_id,params,token):
	#logger.info("Downloading Read Sample{0}".format(read_sample))
	print "Downloading Read Sample{0}".format(read_sample)
	if not logger:
		logger = create_logger(directory,"run_Bowtie2_"+read_sample)
	
	logger.info("Downloading Read Sample{0}".format(read_sample))
	try:
		r_sample = ws_client.get_objects(
                                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
		r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]	
		sample_type = r_sample_info[2].split('-')[0]
		output_name = read_sample.split('.')[0]+"_bowtie2_alignment"
		output_dir = os.path.join(directory,output_name)
	        if not os.path.exists(output_dir): os.mkdir(output_dir)
            	out_file = output_dir +"/accepted_hits.sam"
            	bowtie2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".rev.1.bt2"))
            	### Adding advanced options to Bowtie2Call
            	bowtie2_cmd = ''
		bowtie2_cmd += ( ' -p {0}'.format(num_threads))
            	if('quality_score' in params and params['quality_score'] is not None): bowtie2_cmd += ( ' --'+params['quality_score'])
            	if('alignment_type' in params and params['alignment_type'] is not None): bowtie2_cmd += ( ' --'+params['alignment_type'] )
            	if('preset_options' in params and params['preset_options'] is not None ) and ('alignment_type' in params and params['alignment_type'] is not None):
                	if (params['alignment_type'] == 'local'):
                        	 bowtie2_cmd += (' --'+params['preset_options']+'-local')
                	else: bowtie2_cmd += (' --'+params['preset_options'] )
            	if('trim5' in params and params['trim5'] is not None): bowtie2_cmd += ( ' --trim5 '+str(params['trim5']))
            	if('trim3' in params and params['trim3'] is not None): bowtie2_cmd += ( ' --trim3 '+str(params['trim3']))
            	if('np' in params and params['np'] is not None): bowtie2_cmd += ( ' --np '+str(params['np']))
            	if('minins' in params and params['minins'] is not None): bowtie2_cmd += ( ' --minins '+str(params['minins']))
            	if('maxins' in params and params['maxins'] is not None): bowtie2_cmd += ( ' --maxins '+str(params['maxins']))
            	if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation'])
		
		if sample_type  == 'KBaseAssembly.SingleEndLibrary':
			lib_type = 'SingleEnd'
			read_id = r_sample['data']['handle']['id']
			read_name =  r_sample['data']['handle']['file_name']
			try:
                     		script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token)	
				bowtie2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(directory,read_name),bowtie2_base,out_file)
                	except Exception,e:
                        	#logger.exception( "Unable to download shock file , {0}".format(read_name))
                        	raise Exception( "Unable to download shock file , {0}".format(read_name))
		if sample_type == 'KBaseAssembly.PairedEndLibrary':
			lib_type = 'PairedEnd'
			read1_id = r_sample['data']['handle_1']['id']
			read1_name = r_sample['data']['handle_1']['file_name']
			read2_id = r_sample['data']['handle_2']['id'] 
			read2_name = r_sample['data']['handle_2']['file_name']
			try:
                                script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token)
                                script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token)
				bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(directory,read1_name),os.path.join(directory,read2_name),bowtie2_base,out_file)
			except Exception,e:
                        	#logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                        	raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
Exemple #3
0
def _CallTophat(logger,services,ws_client,hs,ws_id,sample_type,num_threads,read_sample,gtf_file,condition,directory,bowtie2index_id,genome_id,sampleset_id,params,token):
	print "Downloading Read Sample{0}".format(read_sample)
	if not logger:
		logger = create_logger(directory,"run_Tophat_"+read_sample)	
	try:
		r_sample = ws_client.get_objects(
                                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
		r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]	
		sample_type = r_sample_info[2].split('-')[0]
		output_name = read_sample.split('.')[0]+"_tophat_alignment"
		output_dir = os.path.join(directory,output_name)
	        #if not os.path.exists(output_dir): os.makedirs(output_dir)
            	#out_file = output_dir +"/accepted_hits.sam"
            	bowtie2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".rev.1.bt2"))

            	### Adding advanced options to tophat command
		tophat_cmd = (' -p '+str(num_threads))
            	if('max_intron_length' in params and params['max_intron_length'] is not None ) : tophat_cmd += (' -I '+str(params['max_intron_length']))
            	if('min_intron_length' in params and params['min_intron_length'] is not None ): tophat_cmd += (' -i '+str(params['min_intron_length']))
            	if('min_anchor_length' in params and params['min_anchor_length'] is not None ): tophat_cmd += (' -a '+str(params['min_anchor_length']))
            	if('read_edit_dist' in params and params['read_edit_dist'] is not None ) : tophat_cmd += (' --read-edit-dist '+str(params['read_edit_dist']))
            	if('read_gap_length' in params and params['read_gap_length'] is not None) : tophat_cmd += (' --read-gap-length '+str(params['read_gap_length']))
            	if('read_mismatches' in params and params['read_mismatches'] is not None) : tophat_cmd += (' -N '+str(params['read_mismatches']))
            	if('library_type' in params and params['library_type']  is not None ) : tophat_cmd += (' --library-type ' + params['library_type'])
            	if('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1) : tophat_cmd += ' --report-secondary-alignments'
            	if('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search'
            	if('preset_options' in params and params['preset_options'] is not None ): tophat_cmd += ' --'+params['preset_options']
		if sample_type  == 'KBaseAssembly.SingleEndLibrary':
			lib_type = 'SingleEnd'
			read_id = r_sample['data']['handle']['id']
			read_name =  r_sample['data']['handle']['file_name']
			try:
                     		script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token)	
                		tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name))
                	except Exception,e:
                        	raise Exception( "Unable to download shock file , {0}".format(read_name))
		if sample_type == 'KBaseAssembly.PairedEndLibrary':
			lib_type = 'PairedEnd'
			read1_id = r_sample['data']['handle_1']['id']
			read1_name = r_sample['data']['handle_1']['file_name']
			read2_id = r_sample['data']['handle_2']['id'] 
			read2_name = r_sample['data']['handle_2']['file_name']
			try:
                                script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token)
                                script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token)
                		tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name))
			except Exception,e:
                        	raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
Exemple #4
0
def _CallHisat2(logger, services, ws_client, hs, ws_id, sample_type,
                num_threads, read_sample, condition, directory, genome_id,
                sampleset_id, params, token):
    #logger.info("Downloading Read Sample{0}".format(read_sample))
    print "Downloading Read Sample{0}".format(read_sample)
    if not logger:
        logger = handler_util.create_logger(directory,
                                            "run_Hisat2_" + read_sample)
    logger.info("Downloading Read Sample{0}".format(read_sample))
    try:
        r_sample = ws_client.get_objects([{
            'name': read_sample,
            'workspace': ws_id
        }])[0]
        r_sample_info = ws_client.get_object_info_new(
            {"objects": [{
                'name': read_sample,
                'workspace': ws_id
            }]})[0]
        sample_type = r_sample_info[2].split('-')[0]
        input_direc = os.path.join(directory,
                                   read_sample.split('.')[0] + "_hisat2_input")
        if not os.path.exists(input_direc): os.mkdir(input_direc)
        output_name = read_sample.split('.')[0] + "_hisat2_alignment"
        output_dir = os.path.join(directory, output_name)
        if not os.path.exists(output_dir): os.mkdir(output_dir)
        hisat2_base = os.path.join(
            directory, handler_util.get_file_with_suffix(directory, ".1.ht2"))
        ### Adding advanced options to Bowtie2Call
        hisat2_cmd = ''
        hisat2_cmd += (' -p {0}'.format(num_threads))
        if ('quality_score' in params and params['quality_score'] is not None):
            hisat2_cmd += (' --' + params['quality_score'])
        if ('alignment_type' in params
                and params['alignment_type'] is not None):
            hisat2_cmd += (' --' + params['alignment_type'])
        if ('trim5' in params and params['trim5'] is not None):
            hisat2_cmd += (' --trim5 ' + str(params['trim5']))
        if ('trim3' in params and params['trim3'] is not None):
            hisat2_cmd += (' --trim3 ' + str(params['trim3']))
        if ('np' in params and params['np'] is not None):
            hisat2_cmd += (' --np ' + str(params['np']))
        if ('minins' in params and params['minins'] is not None):
            hisat2_cmd += (' --minins ' + str(params['minins']))
        if ('maxins' in params and params['maxins'] is not None):
            hisat2_cmd += (' --maxins ' + str(params['maxins']))
        #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
        if ('min_intron_length' in params
                and params['min_intron_length'] is not None):
            hisat2_cmd += (' --min-intronlen ' +
                           str(params['min_intron_length']))
        if ('max_intron_length' in params
                and params['max_intron_length'] is not None):
            hisat2_cmd += (' --max-intronlen ' +
                           str(params['max_intron_length']))
        if ('no_spliced_alignment' in params
                and params['no_spliced_alignment'] != 0):
            hisat2_cmd += (' --no-spliced-alignment')
        if ('transcriptome_mapping_only' in params
                and params['transcriptome_mapping_only'] != 0):
            hisat2_cmd += (' --transcriptome-mapping-only')
        if ('tailor_alignments' in params
                and params['tailor_alignments'] is not None):
            hisat2_cmd += (' --' + params['tailor_alignments'])
        out_file = output_dir + "/accepted_hits.sam"
        if sample_type == 'KBaseAssembly.SingleEndLibrary':
            lib_type = 'SingleEnd'
            read_id = r_sample['data']['handle']['id']
            read_name = r_sample['data']['handle']['file_name']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read_id,
                    filename=read_name,
                    directory=input_direc,
                    token=token)
                hisat2_cmd += " -U {0} -x {1} -S {2}".format(
                    os.path.join(input_direc, read_name), hisat2_base,
                    out_file)
            except Exception, e:
                #logger.exception( "Unable to download shock file , {0}".format(read_name))
                raise Exception(
                    "Unable to download shock file , {0}".format(read_name))
        if sample_type == 'KBaseAssembly.PairedEndLibrary':
            lib_type = 'PairedEnd'
            if ('orientation' in params and params['orientation'] is not None):
                hisat2_cmd += (' --' + params['orientation'])
            read1_id = r_sample['data']['handle_1']['id']
            read1_name = r_sample['data']['handle_1']['file_name']
            read2_id = r_sample['data']['handle_2']['id']
            read2_name = r_sample['data']['handle_2']['file_name']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read1_id,
                    filename=read1_name,
                    directory=input_direc,
                    token=token)
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read2_id,
                    filename=read2_name,
                    directory=input_direc,
                    token=token)
                hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(
                    os.path.join(input_direc, read1_name),
                    os.path.join(output_dir, read2_name), hisat2_base,
                    out_file)
            except Exception, e:
                #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                raise Exception(
                    "Unable to download shock file , {0} or {1}".format(
                        read1_name, read2_name))
            raise Bowtie2SampleException('Either of the Library typed objects SingleEndLibrary or PairedEndLibrary is required')
        r_label = 'Single'
	### Get the Bw index file
	
	bw_index_files = script_util.check_and_download_existing_handle_obj(logger,ws_client,self.urls,params['ws_id'],params['bowtie_index'],"KBaseRNASeq.Bowtie2Indexes",bowtie2_dir,token)
	try:
                logger.info("Unzipping Bowtie2 Indices")
                script_util.unzip_files(logger,os.path.join(bowtie2_dir,bw_index_files),bowtie2_dir)
                mv_dir= handler_util.get_dir(bowtie2_dir)
                if mv_dir is not None:
                        script_util.move_files(logger,mv_dir,bowtie2_dir)
        except Exception, e:
                logger.error("".join(traceback.format_exc()))
                raise Exception("Unzip indexfile error: Please contact [email protected]")
	### Build Index for the fasta file 
        fasta_file =os.path.join(bowtie2_dir,handler_util.get_file_with_suffix(bowtie2_dir,".fa")+".fa")
        bowtie2base =os.path.join(bowtie2_dir,handler_util.get_file_with_suffix(bowtie2_dir,".fa"))
        bowtie2base_cmd = '{0} {1}'.format(fasta_file,bowtie2base)
	try:
            logger.info("Building Index for Hisat2 {0}".format(bowtie2base_cmd))
            cmdline_output = script_util.runProgram(logger,"bowtie2-build",bowtie2base_cmd,None,bowtie2_dir)
        except Exception,e:
            raise Exception("Failed to run command {0}".format(bowtie2base_cmd))
        ### Check if GTF object exists in the workspace pull the gtf
        ref_id = bowtie_index['data']['genome_id']
        genome_name = ws_client.get_object_info_new({"objects": [{'ref' : ref_id }] })[0][1]
	ws_gtf = genome_name+"_GTF"
	gtf_file = script_util.check_and_download_existing_handle_obj(logger,ws_client,self.urls,params['ws_id'],ws_gtf,"KBaseRNASeq.GFFAnnotation",bowtie2_dir,token)
        if gtf_file is None:
             rnaseq_util.create_gtf_annotation_from_genome(logger,ws_client,hs,self.urls,params['ws_id'],ref_id,genome_name,bowtie2_dir,token)
	# Determine the num_threads provided by the user otherwise default the number of threads to 2
Exemple #6
0
                .format(",".join(missing_objs), params['ws_id']))

    ### Build Hisat2 index
    fasta_file = script_util.generate_fasta(logger, services, token,
                                            annotation_id, hisat2_dir,
                                            params['genome_id'])
    logger.info("Sanitizing the fasta file to correct id names {}".format(
        datetime.datetime.utcnow()))
    mapping_filename = c_mapping.create_sanitized_contig_ids(fasta_file)
    c_mapping.replace_fasta_contig_ids(fasta_file,
                                       mapping_filename,
                                       to_modified=True)
    logger.info("Generating FASTA file completed successfully : {}".format(
        datetime.datetime.utcnow()))
    hisat2base = os.path.join(
        hisat2_dir, handler_util.get_file_with_suffix(hisat2_dir, ".fasta"))
    hisat2base_cmd = '{0} {1}'.format(fasta_file, hisat2base)
    try:
        logger.info("Building Index for Hisat2 {0}".format(hisat2base_cmd))
        cmdline_output = script_util.runProgram(logger, "hisat2-build",
                                                hisat2base_cmd, None,
                                                hisat2_dir)
    except Exception, e:
        raise Exception("Failed to run command {0}".format(hisat2base_cmd))
    ws_gtf = params['genome_id'] + "_GTF"
    ret = script_util.if_obj_exists(None, ws_client, params['ws_id'],
                                    "KBaseRNASeq.GFFAnnotation", [ws_gtf])
    print ret
    if not ret is None:
        logger.info(
            "GFF Annotation Exist for Genome Annotation {0}.... Skipping step "
def _CallBowtie2(
    logger,
    services,
    ws_client,
    hs,
    ws_id,
    sample_type,
    num_threads,
    read_sample,
    condition,
    directory,
    bowtie2index_id,
    genome_id,
    sampleset_id,
    params,
    token,
):
    # logger.info("Downloading Read Sample{0}".format(read_sample))
    print "Downloading Read Sample{0}".format(read_sample)
    if not logger:
        logger = create_logger(directory, "run_Bowtie2_" + read_sample)

    logger.info("Downloading Read Sample{0}".format(read_sample))
    try:
        r_sample = ws_client.get_objects([{"name": read_sample, "workspace": ws_id}])[0]
        r_sample_info = ws_client.get_object_info_new({"objects": [{"name": read_sample, "workspace": ws_id}]})[0]
        sample_type = r_sample_info[2].split("-")[0]
        output_name = read_sample.split(".")[0] + "_bowtie2_alignment"
        output_dir = os.path.join(directory, output_name)
        if not os.path.exists(output_dir):
            os.mkdir(output_dir)
        out_file = output_dir + "/accepted_hits.sam"
        bowtie2_base = os.path.join(directory, handler_util.get_file_with_suffix(directory, ".rev.1.bt2"))
        ### Adding advanced options to Bowtie2Call
        bowtie2_cmd = ""
        bowtie2_cmd += " -p {0}".format(num_threads)
        if "quality_score" in params and params["quality_score"] is not None:
            bowtie2_cmd += " --" + params["quality_score"]
        if "alignment_type" in params and params["alignment_type"] is not None:
            bowtie2_cmd += " --" + params["alignment_type"]
        if ("preset_options" in params and params["preset_options"] is not None) and (
            "alignment_type" in params and params["alignment_type"] is not None
        ):
            if params["alignment_type"] == "local":
                bowtie2_cmd += " --" + params["preset_options"] + "-local"
            else:
                bowtie2_cmd += " --" + params["preset_options"]
        if "trim5" in params and params["trim5"] is not None:
            bowtie2_cmd += " --trim5 " + str(params["trim5"])
        if "trim3" in params and params["trim3"] is not None:
            bowtie2_cmd += " --trim3 " + str(params["trim3"])
        if "np" in params and params["np"] is not None:
            bowtie2_cmd += " --np " + str(params["np"])
        if "minins" in params and params["minins"] is not None:
            bowtie2_cmd += " --minins " + str(params["minins"])
        if "maxins" in params and params["maxins"] is not None:
            bowtie2_cmd += " --maxins " + str(params["maxins"])
        if "orientation" in params and params["orientation"] is not None:
            bowtie2_cmd += " --" + params["orientation"]

        if sample_type == "KBaseAssembly.SingleEndLibrary":
            lib_type = "SingleEnd"
            read_id = r_sample["data"]["handle"]["id"]
            read_name = r_sample["data"]["handle"]["file_name"]
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services["shock_service_url"],
                    shock_id=read_id,
                    filename=read_name,
                    directory=directory,
                    token=token,
                )
                bowtie2_cmd += " -U {0} -x {1} -S {2}".format(
                    os.path.join(directory, read_name), bowtie2_base, out_file
                )
            except Exception, e:
                # logger.exception( "Unable to download shock file , {0}".format(read_name))
                raise Exception("Unable to download shock file , {0}".format(read_name))
        if sample_type == "KBaseAssembly.PairedEndLibrary":
            lib_type = "PairedEnd"
            read1_id = r_sample["data"]["handle_1"]["id"]
            read1_name = r_sample["data"]["handle_1"]["file_name"]
            read2_id = r_sample["data"]["handle_2"]["id"]
            read2_name = r_sample["data"]["handle_2"]["file_name"]
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services["shock_service_url"],
                    shock_id=read1_id,
                    filename=read1_name,
                    directory=directory,
                    token=token,
                )
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services["shock_service_url"],
                    shock_id=read2_id,
                    filename=read2_name,
                    directory=directory,
                    token=token,
                )
                bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(
                    os.path.join(directory, read1_name), os.path.join(directory, read2_name), bowtie2_base, out_file
                )
            except Exception, e:
                # logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                raise Exception("Unable to download shock file , {0} or {1}".format(read1_name, read2_name))
Exemple #8
0
    def runEach(self,task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        
        read_sample = task_params['job_id']
        condition = task_params['label']
        directory = task_params['tophat_dir']
        ws_id = task_params['ws_id']
        reads_type = task_params['reads_type']
        genome_id = task_params['annotation_id']
        sampleset_id = task_params['sampleset_id']
	gtf_file = task_params['gtf_file']

        print "Downloading Read Sample{0}".format(read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
		r_sample = ws_client.get_objects(
                                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
		r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]	
		sample_type = r_sample_info[2].split('-')[0]
		output_name = read_sample.split('.')[0]+"_tophat_alignment"
		output_dir = os.path.join(directory,output_name)
	        #if not os.path.exists(output_dir): os.makedirs(output_dir)
            	#out_file = output_dir +"/accepted_hits.sam"
            	bowtie2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".rev.1.bt2"))
                ### Adding advanced options to Bowtie2Call
		tophat_cmd = (' -p '+str(self.num_threads))
            	if('max_intron_length' in params and params['max_intron_length'] is not None ) : tophat_cmd += (' -I '+str(params['max_intron_length']))
            	if('min_intron_length' in params and params['min_intron_length'] is not None ): tophat_cmd += (' -i '+str(params['min_intron_length']))
            	if('min_anchor_length' in params and params['min_anchor_length'] is not None ): tophat_cmd += (' -a '+str(params['min_anchor_length']))
            	if('read_edit_dist' in params and params['read_edit_dist'] is not None ) : tophat_cmd += (' --read-edit-dist '+str(params['read_edit_dist']))
            	if('read_gap_length' in params and params['read_gap_length'] is not None) : tophat_cmd += (' --read-gap-length '+str(params['read_gap_length']))
            	if('read_mismatches' in params and params['read_mismatches'] is not None) : tophat_cmd += (' -N '+str(params['read_mismatches']))
            	if('library_type' in params and params['library_type']  is not None ) : tophat_cmd += (' --library-type ' + params['library_type'])
            	if('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1) : tophat_cmd += ' --report-secondary-alignments'
            	if('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search'
            	if('preset_options' in params and params['preset_options'] is not None ): tophat_cmd += ' --'+params['preset_options']
                #out_file = output_dir +"/accepted_hits.sam"
                if sample_type  == 'KBaseAssembly.SingleEndLibrary':
                        lib_type = 'SingleEnd'
                        read_id = r_sample['data']['handle']['id']
                        read_name =  r_sample['data']['handle']['file_name']
                        try:
                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token)
                		tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name))
                        except Exception,e:
                                self.logger.exception(e)
                                raise Exception( "Unable to download shock file , {0}".format(read_name))
                if sample_type == 'KBaseAssembly.PairedEndLibrary':
                        lib_type = 'PairedEnd'
                        if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
                        read1_id = r_sample['data']['handle_1']['id']
                        read1_name = r_sample['data']['handle_1']['file_name']
                        read2_id = r_sample['data']['handle_2']['id']
                        read2_name = r_sample['data']['handle_2']['file_name']
                        try:
                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token)
                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token)
                		tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name))
                        except Exception,e:
                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
Exemple #9
0
    def runEach(self,task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        
        read_sample = task_params['job_id']
        condition = task_params['label']
        directory = task_params['bowtie2_dir']
        ws_id = task_params['ws_id']
        genome_id = task_params['annotation_id']
        sampleset_id = task_params['sampleset_id']

        print "Downloading Read Sample{0}".format(read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
                #r_sample = ws_client.get_objects(
                #                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
                r_sample = script_util.ws_get_obj(logger,ws_client, ws_id, read_sample)[0]
                #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]
                #sample_type = r_sample_info[2].split('-')[0]
                sample_type = script_util.ws_get_type_name(logger, ws_client, ws_id, read_sample)
                sample_name = script_util.ws_get_obj_name4file(self.logger, ws_client, ws_id, read_sample)
                input_direc = os.path.join(directory,sample_name.split('.')[0]+"_bowtie2_input")
                if not os.path.exists(input_direc): os.mkdir(input_direc)
                output_name = sample_name.split('.')[0]+"_bowtie2_alignment"
                output_dir = os.path.join(directory,output_name)
                if not os.path.exists(output_dir): os.mkdir(output_dir)
                base = handler_util.get_file_with_suffix(directory,".rev.1.bt2")
                bowtie2_base =os.path.join(directory,base)
	
                ### Adding advanced options to Bowtie2Call
                bowtie2_cmd = ''
                bowtie2_cmd += ( ' -p {0}'.format(self.num_threads))
		if('quality_score' in params and params['quality_score'] is not None): bowtie2_cmd += ( ' --'+params['quality_score'])
                if('alignment_type' in params and params['alignment_type'] is not None): bowtie2_cmd += ( ' --'+params['alignment_type'] )
                if('preset_options' in params and params['preset_options'] is not None ) and ('alignment_type' in params and params['alignment_type'] is not None):
                        if (params['alignment_type'] == 'local'):
                                 bowtie2_cmd += (' --'+params['preset_options']+'-local')
                        else: bowtie2_cmd += (' --'+params['preset_options'] )
                if('trim5' in params and params['trim5'] is not None): bowtie2_cmd += ( ' --trim5 '+str(params['trim5']))
                if('trim3' in params and params['trim3'] is not None): bowtie2_cmd += ( ' --trim3 '+str(params['trim3']))
                if('np' in params and params['np'] is not None): bowtie2_cmd += ( ' --np '+str(params['np']))
                if('minins' in params and params['minins'] is not None): bowtie2_cmd += ( ' --minins '+str(params['minins']))
                if('maxins' in params and params['maxins'] is not None): bowtie2_cmd += ( ' --maxins '+str(params['maxins']))

                out_file = output_dir +"/accepted_hits.sam"
                ####
                try:
                        sample_ref = script_util.ws_get_ref(self.logger, ws_client, ws_id, read_sample)
                        ds = script_util.ru_reads_download(self.logger, sample_ref,input_direc, token)
                except Exception,e:
                        self.logger.exception(e)
                        raise Exception( "Unable to download reads file , {0}".format(read_sample))
                if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
                        lib_type = 'SingleEnd'
                        bowtie2_cmd += " -U {0} -x {1} -S {2}".format(ds['fwd'],bowtie2_base,out_file)
                if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
                        lib_type = 'PairedEnd'
                        if sample_type == 'KBaseAssembly.PairedEndLibrary':
                            if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
                        else:
                            # TODO: the following can be read from PEL object
                            if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
                        hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(ds['fwd'], ds['rev'],hisat2_base,out_file)
                        bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(ds['fwd'], ds['rev'],bowtie2_base,out_file)
                ###
#                if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
#                        lib_type = 'SingleEnd'
#                        if sample_type == 'KBaseAssembly.SingleEndLibrary':
#                            read_id = r_sample['data']['handle']['id']
#                            read_name =  r_sample['data']['handle']['file_name']
#                        else:
#                            read_id = r_sample['data']['lib']['file']['id']
#                            read_name =  r_sample['data']['lib']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token)
#                                bowtie2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),bowtie2_base,out_file)
#                        except Exception,e:
#                                self.logger.exception(e)
#                                raise Exception( "Unable to download shock file , {0}".format(read_name))
#                if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
#                        lib_type = 'PairedEnd'
#                        if sample_type == 'KBaseAssembly.PairedEndLibrary':
#                            if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['handle_1']['id']
#                            read1_name = r_sample['data']['handle_1']['file_name']
#                            read2_id = r_sample['data']['handle_2']['id']
#                            read2_name = r_sample['data']['handle_2']['file_name']
#                        else:
#                            # TODO: the following can be read from PEL object
#                            if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['lib1']['file']['id']
#                            read1_name = r_sample['data']['lib1']['file']['file_name']
#                            read2_id = r_sample['data']['lib2']['file']['id']
#                            read2_name = r_sample['data']['lib2']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token)
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token)
#                                bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(input_direc,read2_name),bowtie2_base,out_file)
#                        except Exception,e:
#                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                try:
                        self.logger.info("Executing: bowtie2 {0}".format(bowtie2_cmd))
                        cmdline_output = script_util.runProgram(self.logger,"bowtie2",bowtie2_cmd,None,directory)
                except Exception,e:
                        raise Exception("Failed to run command {0}".format(bowtie2_cmd))
def _CallTophat(
    logger,
    services,
    ws_client,
    hs,
    ws_id,
    sample_type,
    num_threads,
    read_sample,
    gtf_file,
    condition,
    directory,
    bowtie2index_id,
    genome_id,
    sampleset_id,
    params,
    token,
):
    print "Downloading Read Sample{0}".format(read_sample)
    if not logger:
        logger = create_logger(directory, "run_Tophat_" + read_sample)
    try:
        r_sample = ws_client.get_objects([{"name": read_sample, "workspace": ws_id}])[0]
        r_sample_info = ws_client.get_object_info_new({"objects": [{"name": read_sample, "workspace": ws_id}]})[0]
        sample_type = r_sample_info[2].split("-")[0]
        output_name = read_sample.split(".")[0] + "_tophat_alignment"
        output_dir = os.path.join(directory, output_name)
        # if not os.path.exists(output_dir): os.makedirs(output_dir)
        # out_file = output_dir +"/accepted_hits.sam"
        bowtie2_base = os.path.join(directory, handler_util.get_file_with_suffix(directory, ".rev.1.bt2"))

        ### Adding advanced options to tophat command
        tophat_cmd = " -p " + str(num_threads)
        if "max_intron_length" in params and params["max_intron_length"] is not None:
            tophat_cmd += " -I " + str(params["max_intron_length"])
        if "min_intron_length" in params and params["min_intron_length"] is not None:
            tophat_cmd += " -i " + str(params["min_intron_length"])
        if "min_anchor_length" in params and params["min_anchor_length"] is not None:
            tophat_cmd += " -a " + str(params["min_anchor_length"])
        if "read_edit_dist" in params and params["read_edit_dist"] is not None:
            tophat_cmd += " --read-edit-dist " + str(params["read_edit_dist"])
        if "read_gap_length" in params and params["read_gap_length"] is not None:
            tophat_cmd += " --read-gap-length " + str(params["read_gap_length"])
        if "read_mismatches" in params and params["read_mismatches"] is not None:
            tophat_cmd += " -N " + str(params["read_mismatches"])
        if "library_type" in params and params["library_type"] is not None:
            tophat_cmd += " --library-type " + params["library_type"]
        if "report_secondary_alignments" in params and int(params["report_secondary_alignments"]) == 1:
            tophat_cmd += " --report-secondary-alignments"
        if "no_coverage_search" in params and int(params["no_coverage_search"]) == 1:
            tophat_cmd += " --no-coverage-search"
        if "preset_options" in params and params["preset_options"] is not None:
            tophat_cmd += " --" + params["preset_options"]
        if sample_type == "KBaseAssembly.SingleEndLibrary":
            lib_type = "SingleEnd"
            read_id = r_sample["data"]["handle"]["id"]
            read_name = r_sample["data"]["handle"]["file_name"]
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services["shock_service_url"],
                    shock_id=read_id,
                    filename=read_name,
                    directory=directory,
                    token=token,
                )
                tophat_cmd += " -o {0} -G {1} {2} {3}".format(
                    output_dir, gtf_file, bowtie2_base, os.path.join(directory, read_name)
                )
            except Exception, e:
                raise Exception("Unable to download shock file , {0}".format(read_name))
        if sample_type == "KBaseAssembly.PairedEndLibrary":
            lib_type = "PairedEnd"
            read1_id = r_sample["data"]["handle_1"]["id"]
            read1_name = r_sample["data"]["handle_1"]["file_name"]
            read2_id = r_sample["data"]["handle_2"]["id"]
            read2_name = r_sample["data"]["handle_2"]["file_name"]
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services["shock_service_url"],
                    shock_id=read1_id,
                    filename=read1_name,
                    directory=directory,
                    token=token,
                )
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services["shock_service_url"],
                    shock_id=read2_id,
                    filename=read2_name,
                    directory=directory,
                    token=token,
                )
                tophat_cmd += " -o {0} -G {1} {2} {3} {4}".format(
                    output_dir,
                    gtf_file,
                    bowtie2_base,
                    os.path.join(directory, read1_name),
                    os.path.join(directory, read2_name),
                )
            except Exception, e:
                raise Exception("Unable to download shock file , {0} or {1}".format(read1_name, read2_name))
Exemple #11
0
    def runEach(self, task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']

        read_sample = task_params['job_id']
        condition = task_params['label']
        directory = task_params['hisat2_dir']
        ws_id = task_params['ws_id']
        genome_id = task_params['annotation_id']
        sampleset_id = task_params['sampleset_id']

        print "Downloading Read Sample{0}".format(read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
            #r_sample = ws_client.get_objects(
            #                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
            #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]
            #sample_type = r_sample_info[2].split('-')[0]
            r_sample = script_util.ws_get_obj(self.logger, ws_client, ws_id,
                                              read_sample)[0]
            sample_type = script_util.ws_get_type_name(self.logger, ws_client,
                                                       ws_id, read_sample)
            sample_name = script_util.ws_get_obj_name4file(
                self.logger, ws_client, ws_id, read_sample)
            input_direc = os.path.join(
                directory,
                sample_name.split('.')[0] + "_hisat2_input")
            if not os.path.exists(input_direc): os.mkdir(input_direc)
            output_name = sample_name.split('.')[0] + "_hisat2_alignment"
            output_dir = os.path.join(directory, output_name)
            if not os.path.exists(output_dir): os.mkdir(output_dir)
            print directory
            base = handler_util.get_file_with_suffix(directory, ".1.ht2")
            print base
            hisat2_base = os.path.join(directory, base)
            ### Adding advanced options to Bowtie2Call
            hisat2_cmd = ''
            hisat2_cmd += (' -p {0}'.format(self.num_threads))
            if ('quality_score' in params
                    and params['quality_score'] is not None):
                hisat2_cmd += (' --' + params['quality_score'])
            if ('alignment_type' in params
                    and params['alignment_type'] is not None):
                hisat2_cmd += (' --' + params['alignment_type'])
            if ('trim5' in params and params['trim5'] is not None):
                hisat2_cmd += (' --trim5 ' + str(params['trim5']))
            if ('trim3' in params and params['trim3'] is not None):
                hisat2_cmd += (' --trim3 ' + str(params['trim3']))
            if ('np' in params and params['np'] is not None):
                hisat2_cmd += (' --np ' + str(params['np']))
            if ('minins' in params and params['minins'] is not None):
                hisat2_cmd += (' --minins ' + str(params['minins']))
            if ('maxins' in params and params['maxins'] is not None):
                hisat2_cmd += (' --maxins ' + str(params['maxins']))
            #if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
            if ('min_intron_length' in params
                    and params['min_intron_length'] is not None):
                hisat2_cmd += (' --min-intronlen ' +
                               str(params['min_intron_length']))
            if ('max_intron_length' in params
                    and params['max_intron_length'] is not None):
                hisat2_cmd += (' --max-intronlen ' +
                               str(params['max_intron_length']))
            if ('no_spliced_alignment' in params
                    and params['no_spliced_alignment'] != 0):
                hisat2_cmd += (' --no-spliced-alignment')
            if ('transcriptome_mapping_only' in params
                    and params['transcriptome_mapping_only'] != 0):
                hisat2_cmd += (' --transcriptome-mapping-only')
            if ('tailor_alignments' in params
                    and params['tailor_alignments'] is not None):
                hisat2_cmd += (' --' + params['tailor_alignments'])
            out_file = output_dir + "/accepted_hits.sam"
            ####
            try:
                sample_ref = script_util.ws_get_ref(self.logger, ws_client,
                                                    ws_id, read_sample)
                ds = script_util.ru_reads_download(self.logger, sample_ref,
                                                   input_direc, token)
                self.logger.info(ds)
            except Exception, e:
                self.logger.exception(e)
                raise Exception(
                    "Unable to download reads file , {0}".format(read_sample))
            if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary':
                lib_type = 'SingleEnd'
                hisat2_cmd += " -U {0} -x {1} -S {2}".format(
                    ds['fwd'], hisat2_base, out_file)
            if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
                lib_type = 'PairedEnd'
                if sample_type == 'KBaseAssembly.PairedEndLibrary':
                    if ('orientation' in params
                            and params['orientation'] is not None):
                        hisat2_cmd += (' --' + params['orientation'])
                else:
                    # TODO: the following can be read from PEL object
                    if ('orientation' in params
                            and params['orientation'] is not None):
                        hisat2_cmd += (' --' + params['orientation'])
                hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(
                    ds['fwd'], ds['rev'], hisat2_base, out_file)
            #if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
            #        lib_type = 'SingleEnd'
            #        if sample_type == 'KBaseAssembly.SingleEndLibrary':
            #            read_id = r_sample['data']['handle']['id']
            #            read_name =  r_sample['data']['handle']['file_name']
            #        else:
            #            read_id = r_sample['data']['lib']['file']['id']
            #            read_name =  r_sample['data']['lib']['file']['file_name']
            #        try:
            #                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token)
            #                hisat2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),hisat2_base,out_file)
            #        except Exception,e:
            #                self.logger.exception(e)
            #                raise Exception( "Unable to download shock file , {0}".format(read_name))
            #if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
            #        lib_type = 'PairedEnd'
            #        if sample_type == 'KBaseAssembly.PairedEndLibrary':
            #            if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
            #            read1_id = r_sample['data']['handle_1']['id']
            #            read1_name = r_sample['data']['handle_1']['file_name']
            #            read2_id = r_sample['data']['handle_2']['id']
            #            read2_name = r_sample['data']['handle_2']['file_name']
            #        else:
            #            # TODO: the following can be read from PEL object
            #            if('orientation' in params and params['orientation'] is not None): hisat2_cmd += ( ' --'+params['orientation'])
            #            read1_id = r_sample['data']['lib1']['file']['id']
            #            read1_name = r_sample['data']['lib1']['file']['file_name']
            #            read2_id = r_sample['data']['lib2']['file']['id']
            #            read2_name = r_sample['data']['lib2']['file']['file_name']
            #        try:
            #                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token)
            #                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token)
            #                hisat2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(input_direc,read2_name),hisat2_base,out_file)
            #        except Exception,e:
            #                logger.exception(e)
            #                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
            try:
                self.logger.info("Executing: hisat2 {0}".format(hisat2_cmd))
                cmdline_output = script_util.runProgram(
                    self.logger, "hisat2", hisat2_cmd, None, directory)
            except Exception, e:
                logger.exception(e)
                raise Exception("Failed to run command {0}".format(hisat2_cmd))
Exemple #12
0
def _CallBowtie2(logger, services, ws_client, hs, ws_id, sample_type,
                 num_threads, read_sample, condition, directory,
                 bowtie2index_id, genome_id, sampleset_id, params, token):
    #logger.info("Downloading Read Sample{0}".format(read_sample))
    print "Downloading Read Sample{0}".format(read_sample)
    if not logger:
        logger = create_logger(directory, "run_Bowtie2_" + read_sample)

    logger.info("Downloading Read Sample{0}".format(read_sample))
    try:
        r_sample = ws_client.get_objects([{
            'name': read_sample,
            'workspace': ws_id
        }])[0]
        r_sample_info = ws_client.get_object_info_new(
            {"objects": [{
                'name': read_sample,
                'workspace': ws_id
            }]})[0]
        sample_type = r_sample_info[2].split('-')[0]
        output_name = read_sample.split('.')[0] + "_bowtie2_alignment"
        output_dir = os.path.join(directory, output_name)
        if not os.path.exists(output_dir): os.mkdir(output_dir)
        out_file = output_dir + "/accepted_hits.sam"
        bowtie2_base = os.path.join(
            directory,
            handler_util.get_file_with_suffix(directory, ".rev.1.bt2"))
        ### Adding advanced options to Bowtie2Call
        bowtie2_cmd = ''
        bowtie2_cmd += (' -p {0}'.format(num_threads))
        if ('quality_score' in params and params['quality_score'] is not None):
            bowtie2_cmd += (' --' + params['quality_score'])
        if ('alignment_type' in params
                and params['alignment_type'] is not None):
            bowtie2_cmd += (' --' + params['alignment_type'])
        if ('preset_options' in params and params['preset_options']
                is not None) and ('alignment_type' in params
                                  and params['alignment_type'] is not None):
            if (params['alignment_type'] == 'local'):
                bowtie2_cmd += (' --' + params['preset_options'] + '-local')
            else:
                bowtie2_cmd += (' --' + params['preset_options'])
        if ('trim5' in params and params['trim5'] is not None):
            bowtie2_cmd += (' --trim5 ' + str(params['trim5']))
        if ('trim3' in params and params['trim3'] is not None):
            bowtie2_cmd += (' --trim3 ' + str(params['trim3']))
        if ('np' in params and params['np'] is not None):
            bowtie2_cmd += (' --np ' + str(params['np']))
        if ('minins' in params and params['minins'] is not None):
            bowtie2_cmd += (' --minins ' + str(params['minins']))
        if ('maxins' in params and params['maxins'] is not None):
            bowtie2_cmd += (' --maxins ' + str(params['maxins']))
        if ('orientation' in params and params['orientation'] is not None):
            bowtie2_cmd += (' --' + params['orientation'])

        if sample_type == 'KBaseAssembly.SingleEndLibrary':
            lib_type = 'SingleEnd'
            read_id = r_sample['data']['handle']['id']
            read_name = r_sample['data']['handle']['file_name']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read_id,
                    filename=read_name,
                    directory=directory,
                    token=token)
                bowtie2_cmd += " -U {0} -x {1} -S {2}".format(
                    os.path.join(directory, read_name), bowtie2_base, out_file)
            except Exception, e:
                #logger.exception( "Unable to download shock file , {0}".format(read_name))
                raise Exception(
                    "Unable to download shock file , {0}".format(read_name))
        if sample_type == 'KBaseAssembly.PairedEndLibrary':
            lib_type = 'PairedEnd'
            read1_id = r_sample['data']['handle_1']['id']
            read1_name = r_sample['data']['handle_1']['file_name']
            read2_id = r_sample['data']['handle_2']['id']
            read2_name = r_sample['data']['handle_2']['file_name']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read1_id,
                    filename=read1_name,
                    directory=directory,
                    token=token)
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read2_id,
                    filename=read2_name,
                    directory=directory,
                    token=token)
                bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(
                    os.path.join(directory, read1_name),
                    os.path.join(directory, read2_name), bowtie2_base,
                    out_file)
            except Exception, e:
                #logger.Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                raise Exception(
                    "Unable to download shock file , {0} or {1}".format(
                        read1_name, read2_name))
Exemple #13
0
    def runEach(self,task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        
        read_sample = task_params['job_id']
        condition = task_params['label']
        directory = task_params['tophat_dir']
        ws_id = task_params['ws_id']
        genome_id = task_params['annotation_id']
        sampleset_id = task_params['sampleset_id']
	gtf_file = task_params['gtf_file']

        print "Downloading Read Sample{0}".format(read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
		#r_sample = ws_client.get_objects(
                #                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
                r_sample = script_util.ws_get_obj(logger,ws_client, ws_id, read_sample)[0]
		#r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]	
		#sample_type = r_sample_info[2].split('-')[0]
                sample_type = script_util.ws_get_type_name(logger, ws_client, ws_id, read_sample)
                sample_name = script_util.ws_get_obj_name4file(self.logger, ws_client, ws_id, read_sample)
		output_name = sample_name.split('.')[0]+"_tophat_alignment"
		output_dir = os.path.join(directory,output_name)
	        #if not os.path.exists(output_dir): os.makedirs(output_dir)
            	#out_file = output_dir +"/accepted_hits.sam"
            	bowtie2_base =os.path.join(directory,handler_util.get_file_with_suffix(directory,".rev.1.bt2"))
                ### Adding advanced options to Bowtie2Call
		tophat_cmd = (' -p '+str(self.num_threads))
            	if('max_intron_length' in params and params['max_intron_length'] is not None ) : tophat_cmd += (' -I '+str(params['max_intron_length']))
            	if('min_intron_length' in params and params['min_intron_length'] is not None ): tophat_cmd += (' -i '+str(params['min_intron_length']))
            	if('min_anchor_length' in params and params['min_anchor_length'] is not None ): tophat_cmd += (' -a '+str(params['min_anchor_length']))
            	if('read_edit_dist' in params and params['read_edit_dist'] is not None ) : tophat_cmd += (' --read-edit-dist '+str(params['read_edit_dist']))
            	if('read_gap_length' in params and params['read_gap_length'] is not None) : tophat_cmd += (' --read-gap-length '+str(params['read_gap_length']))
            	if('read_mismatches' in params and params['read_mismatches'] is not None) : tophat_cmd += (' -N '+str(params['read_mismatches']))
            	if('library_type' in params and params['library_type']  is not None ) : tophat_cmd += (' --library-type ' + params['library_type'])
            	if('report_secondary_alignments' in params and int(params['report_secondary_alignments']) == 1) : tophat_cmd += ' --report-secondary-alignments'
            	if('no_coverage_search' in params and int(params['no_coverage_search']) == 1): tophat_cmd += ' --no-coverage-search'
            	if('preset_options' in params and params['preset_options'] is not None ): tophat_cmd += ' --'+params['preset_options']
                #out_file = output_dir +"/accepted_hits.sam"
                try:
                        sample_ref = script_util.ws_get_ref(self.logger, ws_client, ws_id, read_sample)
                        ds = script_util.ru_reads_download(self.logger, sample_ref,directory, token)
                except Exception,e:
                        self.logger.exception(e)
                        raise Exception( "Unable to download reads file , {0}".format(read_sample))
                if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
                        lib_type = 'SingleEnd'
                	tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,ds['fwd'])
                if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
                        lib_type = 'PairedEnd'
                        if sample_type == 'KBaseAssembly.PairedEndLibrary':
                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
                        else:
                            # TODO: the following can be read from PEL object
                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
                        tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,ds['fwd'],ds['rev'])

#                if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
#                        lib_type = 'SingleEnd'
#                        if sample_type == 'KBaseAssembly.SingleEndLibrary':
#                            read_id = r_sample['data']['handle']['id']
#                            read_name =  r_sample['data']['handle']['file_name']
#                        else:
#                            read_id = r_sample['data']['lib']['file']['id']
#                            read_name =  r_sample['data']['lib']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token)
#                		tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name))
#                        except Exception,e:
#                                self.logger.exception(e)
#                                raise Exception( "Unable to download shock file , {0}".format(read_name))
#                if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
#                        lib_type = 'PairedEnd'
#                        if sample_type == 'KBaseAssembly.PairedEndLibrary':
#                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['handle_1']['id']
#                            read1_name = r_sample['data']['handle_1']['file_name']
#                            read2_id = r_sample['data']['handle_2']['id']
#                            read2_name = r_sample['data']['handle_2']['file_name']
#                        else:
#                            # TODO: the following can be read from PEL object
#                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['lib1']['file']['id']
#                            read1_name = r_sample['data']['lib1']['file']['file_name']
#                            read2_id = r_sample['data']['lib2']['file']['id']
#                            read2_name = r_sample['data']['lib2']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token)
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token)
#                		tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name))
#                        except Exception,e:
#                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
                try:
                        self.logger.info("Executing: tophat {0}".format(tophat_cmd))
                        cmdline_output, cmd_err = script_util.runProgram(self.logger,"tophat",tophat_cmd,None,directory)
                except Exception,e:
                        raise Exception("Failed to run command {0}\n{1}\n{2}".format(tophat_cmd,cmdline_output,cmd_err))
Exemple #14
0
    def runEach(self,task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']
        
        read_sample = task_params['job_id']
        condition = task_params['label']
        directory = task_params['bowtie2_dir']
        ws_id = task_params['ws_id']
        reads_type = task_params['reads_type']
        genome_id = task_params['annotation_id']
        sampleset_id = task_params['sampleset_id']

        print "Downloading Read Sample{0}".format(read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
                r_sample = ws_client.get_objects(
                                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
                r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]
                sample_type = r_sample_info[2].split('-')[0]
                input_direc = os.path.join(directory,read_sample.split('.')[0]+"_bowtie2_input")
                if not os.path.exists(input_direc): os.mkdir(input_direc)
                output_name = read_sample.split('.')[0]+"_bowtie2_alignment"
                output_dir = os.path.join(directory,output_name)
                if not os.path.exists(output_dir): os.mkdir(output_dir)
                base = handler_util.get_file_with_suffix(directory,".rev.1.bt2")
                bowtie2_base =os.path.join(directory,base)
	
                ### Adding advanced options to Bowtie2Call
                bowtie2_cmd = ''
                bowtie2_cmd += ( ' -p {0}'.format(self.num_threads))
		if('quality_score' in params and params['quality_score'] is not None): bowtie2_cmd += ( ' --'+params['quality_score'])
                if('alignment_type' in params and params['alignment_type'] is not None): bowtie2_cmd += ( ' --'+params['alignment_type'] )
                if('preset_options' in params and params['preset_options'] is not None ) and ('alignment_type' in params and params['alignment_type'] is not None):
                        if (params['alignment_type'] == 'local'):
                                 bowtie2_cmd += (' --'+params['preset_options']+'-local')
                        else: bowtie2_cmd += (' --'+params['preset_options'] )
                if('trim5' in params and params['trim5'] is not None): bowtie2_cmd += ( ' --trim5 '+str(params['trim5']))
                if('trim3' in params and params['trim3'] is not None): bowtie2_cmd += ( ' --trim3 '+str(params['trim3']))
                if('np' in params and params['np'] is not None): bowtie2_cmd += ( ' --np '+str(params['np']))
                if('minins' in params and params['minins'] is not None): bowtie2_cmd += ( ' --minins '+str(params['minins']))
                if('maxins' in params and params['maxins'] is not None): bowtie2_cmd += ( ' --maxins '+str(params['maxins']))

                out_file = output_dir +"/accepted_hits.sam"
                if sample_type  == 'KBaseAssembly.SingleEndLibrary':
                        lib_type = 'SingleEnd'
                        read_id = r_sample['data']['handle']['id']
                        read_name =  r_sample['data']['handle']['file_name']
                        try:
                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=input_direc,token=token)
                                bowtie2_cmd += " -U {0} -x {1} -S {2}".format(os.path.join(input_direc,read_name),bowtie2_base,out_file)
                        except Exception,e:
                                self.logger.exception(e)
                                raise Exception( "Unable to download shock file , {0}".format(read_name))
                if sample_type == 'KBaseAssembly.PairedEndLibrary':
                        lib_type = 'PairedEnd'
                        if('orientation' in params and params['orientation'] is not None): bowtie2_cmd += ( ' --'+params['orientation'])
                        read1_id = r_sample['data']['handle_1']['id']
                        read1_name = r_sample['data']['handle_1']['file_name']
                        read2_id = r_sample['data']['handle_2']['id']
                        read2_name = r_sample['data']['handle_2']['file_name']
                        try:
                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=input_direc,token=token)
                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=input_direc,token=token)
                                bowtie2_cmd += " -1 {0} -2 {1} -x {2} -S {3}".format(os.path.join(input_direc,read1_name),os.path.join(output_dir,read2_name),bowtie2_base,out_file)
                        except Exception,e:
                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
Exemple #15
0
        	reads = sample['data']['sample_ids']
        	reads_type= sample['data']['Library_type']
        	if reads_type == 'PairedEnd': r_type = 'KBaseAssembly.PairedEndLibrary'
        	else: r_type = 'KBaseAssembly.SingleEndLibrary'
        	e_ws_objs = script_util.if_ws_obj_exists(None,ws_client,params['ws_id'],r_type,reads)
        	missing_objs = [i for i in reads if not i in e_ws_objs]
        	if len(e_ws_objs) != len(reads):
            		raise Exception('Missing Library objects {0} in the {1}. please copy them and run this method'.format(",".join(missing_objs),params['ws_id']))

	### Build Hisat2 index
	fasta_file = script_util.generate_fasta(logger,services,token,annotation_id,hisat2_dir,params['genome_id'])
        logger.info("Sanitizing the fasta file to correct id names {}".format(datetime.datetime.utcnow()))
        mapping_filename = c_mapping.create_sanitized_contig_ids(fasta_file)
        c_mapping.replace_fasta_contig_ids(fasta_file, mapping_filename, to_modified=True)
        logger.info("Generating FASTA file completed successfully : {}".format(datetime.datetime.utcnow()))
        hisat2base =os.path.join(hisat2_dir,handler_util.get_file_with_suffix(hisat2_dir,".fasta"))
        hisat2base_cmd = '{0} {1}'.format(fasta_file,hisat2base)
	try:
            logger.info("Building Index for Hisat2 {0}".format(hisat2base_cmd))
            cmdline_output = script_util.runProgram(logger,"hisat2-build",hisat2base_cmd,None,hisat2_dir)
        except Exception,e:
            raise Exception("Failed to run command {0}".format(hisat2base_cmd))
        ws_gtf = params['genome_id']+"_GTF"
        ret = script_util.if_obj_exists(None,ws_client,params['ws_id'],"KBaseRNASeq.GFFAnnotation",[ws_gtf])
        print ret
        if not ret is None:
            logger.info("GFF Annotation Exist for Genome Annotation {0}.... Skipping step ".format(params['genome_id']))
            annot_name,annot_id = ret[0]
            gtf_obj=ws_client.get_objects([{'ref' : annot_id}])[0]
            gtf_id=gtf_obj['data']['handle']['id']
            gtf_name=gtf_obj['data']['handle']['file_name']
        bw_name =  bowtie_index['data']['handle']['file_name']
        genome_id = bowtie_index['data']['genome_id']
        annotation_gtf = ws_client.get_object_info([{"ref" :genome_id}],includeMetadata=None)[0][1]
        shared_files={}
        shared_files[bw_name] = bw_id
        script_util.download_shock_files(logger,self.urls['shock_service_url'],tophat_dir,shared_files,token)
        try:
            logger.info("Unzipping Bowtie2 Indices")
            script_util.unzip_files(logger,os.path.join(tophat_dir,bw_name),tophat_dir)
            mv_dir= handler_util.get_dir(tophat_dir)
            if mv_dir is not None:
                    script_util.move_files(logger,mv_dir,tophat_dir)
        except Exception, e:
               logger.error("".join(traceback.format_exc()))
               raise Exception("Unzip indexfile error")
        fasta_file =os.path.join(tophat_dir,(handler_util.get_file_with_suffix(tophat_dir,".fa")+".fa"))
        bowtie2base =os.path.join(tophat_dir,handler_util.get_file_with_suffix(tophat_dir,".rev.1.bt2"))

	### Check if GTF annotation object exist or skip this step
	### Check if the gtf object exists in the workspace
        ### Only run create_gtf_annotation if object doesnt exist
	ws_gtf = annotation_gtf+"_GTF_Annotation"

        genome_name = script_util.ws_get_obj_name( logger, ws_client, params['ws_id'], genome_id )
        gtf_file = script_util.check_and_download_existing_handle_obj(logger,ws_client,self.urls,params['ws_id'],ws_gtf,"KBaseRNASeq.GFFAnnotation",tophat_dir,token)
        if gtf_file is None:
            gtf_file = rnaseq_util.create_gtf_annotation_from_genome(logger,ws_client,hs,self.urls,params['ws_id'],genome_id,genome_name,tophat_dir,token)
	#ret = script_util.if_obj_exists(None,ws_client,params['ws_id'],"KBaseRNASeq.GFFAnnotation",[ws_gtf]) # this line should be safe from reference
        #if not ret is None:
        #    logger.info("GFF Annotation Exist for Genome Annotation {0}.... Skipping step ".format(annotation_gtf))
	#    annot_name,annot_id = ret[0]
Exemple #17
0
        script_util.download_shock_files(logger,
                                         self.urls['shock_service_url'],
                                         tophat_dir, shared_files, token)
        try:
            logger.info("Unzipping Bowtie2 Indices")
            script_util.unzip_files(logger, os.path.join(tophat_dir, bw_name),
                                    tophat_dir)
            mv_dir = handler_util.get_dir(tophat_dir)
            if mv_dir is not None:
                script_util.move_files(logger, mv_dir, tophat_dir)
        except Exception, e:
            logger.error("".join(traceback.format_exc()))
            raise Exception("Unzip indexfile error")
        fasta_file = os.path.join(
            tophat_dir,
            (handler_util.get_file_with_suffix(tophat_dir, ".fa") + ".fa"))
        bowtie2base = os.path.join(
            tophat_dir,
            handler_util.get_file_with_suffix(tophat_dir, ".rev.1.bt2"))

        ### Check if GTF annotation object exist or skip this step
        ### Check if the gtf object exists in the workspace
        ### Only run create_gtf_annotation if object doesnt exist
        ws_gtf = annotation_gtf + "_GTF_Annotation"
        gtf_file = script_util.check_and_download_existing_handle_obj(
            logger, ws_client, self.urls, params['ws_id'], ws_gtf,
            "KBaseRNASeq.GFFAnnotation", tophat_dir, token)
        if gtf_file is None:
            gtf_file = rnaseq_util.create_gtf_annotation_from_genome(
                logger, ws_client, hs, self.urls, params['ws_id'], ref_id,
                genome_name, tophat_dir, token)
Exemple #18
0
def _CallTophat(logger, services, ws_client, hs, ws_id, sample_type,
                num_threads, read_sample, gtf_file, condition, directory,
                bowtie2index_id, genome_id, sampleset_id, params, token):
    print "Downloading Read Sample{0}".format(read_sample)
    if not logger:
        logger = create_logger(directory, "run_Tophat_" + read_sample)
    try:
        r_sample = ws_client.get_objects([{
            'name': read_sample,
            'workspace': ws_id
        }])[0]
        r_sample_info = ws_client.get_object_info_new(
            {"objects": [{
                'name': read_sample,
                'workspace': ws_id
            }]})[0]
        sample_type = r_sample_info[2].split('-')[0]
        output_name = read_sample.split('.')[0] + "_tophat_alignment"
        output_dir = os.path.join(directory, output_name)
        #if not os.path.exists(output_dir): os.makedirs(output_dir)
        #out_file = output_dir +"/accepted_hits.sam"
        bowtie2_base = os.path.join(
            directory,
            handler_util.get_file_with_suffix(directory, ".rev.1.bt2"))

        ### Adding advanced options to tophat command
        tophat_cmd = (' -p ' + str(num_threads))
        if ('max_intron_length' in params
                and params['max_intron_length'] is not None):
            tophat_cmd += (' -I ' + str(params['max_intron_length']))
        if ('min_intron_length' in params
                and params['min_intron_length'] is not None):
            tophat_cmd += (' -i ' + str(params['min_intron_length']))
        if ('min_anchor_length' in params
                and params['min_anchor_length'] is not None):
            tophat_cmd += (' -a ' + str(params['min_anchor_length']))
        if ('read_edit_dist' in params
                and params['read_edit_dist'] is not None):
            tophat_cmd += (' --read-edit-dist ' +
                           str(params['read_edit_dist']))
        if ('read_gap_length' in params
                and params['read_gap_length'] is not None):
            tophat_cmd += (' --read-gap-length ' +
                           str(params['read_gap_length']))
        if ('read_mismatches' in params
                and params['read_mismatches'] is not None):
            tophat_cmd += (' -N ' + str(params['read_mismatches']))
        if ('library_type' in params and params['library_type'] is not None):
            tophat_cmd += (' --library-type ' + params['library_type'])
        if ('report_secondary_alignments' in params
                and int(params['report_secondary_alignments']) == 1):
            tophat_cmd += ' --report-secondary-alignments'
        if ('no_coverage_search' in params
                and int(params['no_coverage_search']) == 1):
            tophat_cmd += ' --no-coverage-search'
        if ('preset_options' in params
                and params['preset_options'] is not None):
            tophat_cmd += ' --' + params['preset_options']
        if sample_type == 'KBaseAssembly.SingleEndLibrary':
            lib_type = 'SingleEnd'
            read_id = r_sample['data']['handle']['id']
            read_name = r_sample['data']['handle']['file_name']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read_id,
                    filename=read_name,
                    directory=directory,
                    token=token)
                tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(
                    output_dir, gtf_file, bowtie2_base,
                    os.path.join(directory, read_name))
            except Exception, e:
                raise Exception(
                    "Unable to download shock file , {0}".format(read_name))
        if sample_type == 'KBaseAssembly.PairedEndLibrary':
            lib_type = 'PairedEnd'
            read1_id = r_sample['data']['handle_1']['id']
            read1_name = r_sample['data']['handle_1']['file_name']
            read2_id = r_sample['data']['handle_2']['id']
            read2_name = r_sample['data']['handle_2']['file_name']
            try:
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read1_id,
                    filename=read1_name,
                    directory=directory,
                    token=token)
                script_util.download_file_from_shock(
                    logger,
                    shock_service_url=services['shock_service_url'],
                    shock_id=read2_id,
                    filename=read2_name,
                    directory=directory,
                    token=token)
                tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(
                    output_dir, gtf_file, bowtie2_base,
                    os.path.join(directory, read1_name),
                    os.path.join(directory, read2_name))
            except Exception, e:
                raise Exception(
                    "Unable to download shock file , {0} or {1}".format(
                        read1_name, read2_name))
Exemple #19
0
        bw_name =  bowtie_index['data']['handle']['file_name']
        genome_id = bowtie_index['data']['genome_id']
        annotation_gtf = ws_client.get_object_info([{"ref" :genome_id}],includeMetadata=None)[0][1]
        shared_files={}
        shared_files[bw_name] = bw_id
        script_util.download_shock_files(logger,self.urls['shock_service_url'],tophat_dir,shared_files,token)
        try:
            logger.info("Unzipping Bowtie2 Indices")
            script_util.unzip_files(logger,os.path.join(tophat_dir,bw_name),tophat_dir)
            mv_dir= handler_util.get_dir(tophat_dir)
            if mv_dir is not None:
                    script_util.move_files(logger,mv_dir,tophat_dir)
        except Exception, e:
               logger.error("".join(traceback.format_exc()))
               raise Exception("Unzip indexfile error: Please contact [email protected]")
        fasta_file =os.path.join(tophat_dir,(handler_util.get_file_with_suffix(tophat_dir,".fa")+".fa"))
        bowtie2base =os.path.join(tophat_dir,handler_util.get_file_with_suffix(tophat_dir,".rev.1.bt2"))

	### Check if GTF annotation object exist or skip this step
	### Check if the gtf object exists in the workspace
        ### Only run create_gtf_annotation if object doesnt exist
	ws_gtf = annotation_gtf+"_GTF_Annotation"
	ret = script_util.if_obj_exists(None,ws_client,params['ws_id'],"KBaseRNASeq.GFFAnnotation",[ws_gtf])
        if not ret is None:
            logger.info("GFF Annotation Exist for Genome Annotation {0}.... Skipping step ".format(annotation_gtf))
	    annot_name,annot_id = ret[0]
            gtf_obj=ws_client.get_objects([{'ref' : annot_id}])[0]
            gtf_id=gtf_obj['data']['handle']['id']
            gtf_name=gtf_obj['data']['handle']['file_name']
            try:
               script_util.download_file_from_shock(logger, shock_service_url=self.urls['shock_service_url'], shock_id=gtf_id,filename=gtf_name, directory=tophat_dir,token=token)
Exemple #20
0
     token)
 try:
     logger.info("Unzipping Bowtie2 Indices")
     script_util.unzip_files(logger,
                             os.path.join(bowtie2_dir, bw_index_files),
                             bowtie2_dir)
     mv_dir = handler_util.get_dir(bowtie2_dir)
     if mv_dir is not None:
         script_util.move_files(logger, mv_dir, bowtie2_dir)
 except Exception, e:
     logger.error("".join(traceback.format_exc()))
     raise Exception("Unzip indexfile error")
 ### Build Index for the fasta file
 fasta_file = os.path.join(
     bowtie2_dir,
     handler_util.get_file_with_suffix(bowtie2_dir, ".fa") + ".fa")
 bowtie2base = os.path.join(
     bowtie2_dir, handler_util.get_file_with_suffix(bowtie2_dir, ".fa"))
 bowtie2base_cmd = '{0} {1}'.format(fasta_file, bowtie2base)
 try:
     logger.info(
         "Building Index for Hisat2 {0}".format(bowtie2base_cmd))
     cmdline_output = script_util.runProgram(logger, "bowtie2-build",
                                             bowtie2base_cmd, None,
                                             bowtie2_dir)
 except Exception, e:
     raise Exception(
         "Failed to run command {0}".format(bowtie2base_cmd))
 ### Check if GTF object exists in the workspace pull the gtf
 ref_id = bowtie_index['data']['genome_id']
 genome_name = ws_client.get_object_info_new(
Exemple #21
0
    def runEach(self, task_params):
        ws_client = self.common_params['ws_client']
        hs = self.common_params['hs_client']
        params = self.method_params
        logger = self.logger
        token = self.common_params['user_token']

        read_sample = task_params['job_id']
        condition = task_params['label']
        directory = task_params['tophat_dir']
        ws_id = task_params['ws_id']
        genome_id = task_params['annotation_id']
        sampleset_id = task_params['sampleset_id']
        gtf_file = task_params['gtf_file']

        print "Downloading Read Sample{0}".format(read_sample)
        logger.info("Downloading Read Sample{0}".format(read_sample))
        try:
            #r_sample = ws_client.get_objects(
            #                        [{ 'name' : read_sample, 'workspace' : ws_id}])[0]
            r_sample = script_util.ws_get_obj(logger, ws_client, ws_id,
                                              read_sample)[0]
            #r_sample_info = ws_client.get_object_info_new({"objects": [{'name': read_sample, 'workspace': ws_id}]})[0]
            #sample_type = r_sample_info[2].split('-')[0]
            sample_type = script_util.ws_get_type_name(logger, ws_client,
                                                       ws_id, read_sample)
            sample_name = script_util.ws_get_obj_name4file(
                self.logger, ws_client, ws_id, read_sample)
            output_name = sample_name.split('.')[0] + "_tophat_alignment"
            output_dir = os.path.join(directory, output_name)
            #if not os.path.exists(output_dir): os.makedirs(output_dir)
            #out_file = output_dir +"/accepted_hits.sam"
            bowtie2_base = os.path.join(
                directory,
                handler_util.get_file_with_suffix(directory, ".rev.1.bt2"))
            ### Adding advanced options to Bowtie2Call
            tophat_cmd = (' -p ' + str(self.num_threads))
            if ('max_intron_length' in params
                    and params['max_intron_length'] is not None):
                tophat_cmd += (' -I ' + str(params['max_intron_length']))
            if ('min_intron_length' in params
                    and params['min_intron_length'] is not None):
                tophat_cmd += (' -i ' + str(params['min_intron_length']))
            if ('min_anchor_length' in params
                    and params['min_anchor_length'] is not None):
                tophat_cmd += (' -a ' + str(params['min_anchor_length']))
            if ('read_edit_dist' in params
                    and params['read_edit_dist'] is not None):
                tophat_cmd += (' --read-edit-dist ' +
                               str(params['read_edit_dist']))
            if ('read_gap_length' in params
                    and params['read_gap_length'] is not None):
                tophat_cmd += (' --read-gap-length ' +
                               str(params['read_gap_length']))
            if ('read_mismatches' in params
                    and params['read_mismatches'] is not None):
                tophat_cmd += (' -N ' + str(params['read_mismatches']))
            if ('library_type' in params
                    and params['library_type'] is not None):
                tophat_cmd += (' --library-type ' + params['library_type'])
            if ('report_secondary_alignments' in params
                    and int(params['report_secondary_alignments']) == 1):
                tophat_cmd += ' --report-secondary-alignments'
            if ('no_coverage_search' in params
                    and int(params['no_coverage_search']) == 1):
                tophat_cmd += ' --no-coverage-search'
            if ('preset_options' in params
                    and params['preset_options'] is not None):
                tophat_cmd += ' --' + params['preset_options']
            #out_file = output_dir +"/accepted_hits.sam"
            try:
                sample_ref = script_util.ws_get_ref(self.logger, ws_client,
                                                    ws_id, read_sample)
                ds = script_util.ru_reads_download(self.logger, sample_ref,
                                                   directory, token)
            except Exception, e:
                self.logger.exception(e)
                raise Exception(
                    "Unable to download reads file , {0}".format(read_sample))
            if sample_type == 'KBaseAssembly.SingleEndLibrary' or sample_type == 'KBaseFile.SingleEndLibrary':
                lib_type = 'SingleEnd'
                tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(
                    output_dir, gtf_file, bowtie2_base, ds['fwd'])
            if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
                lib_type = 'PairedEnd'
                if sample_type == 'KBaseAssembly.PairedEndLibrary':
                    if ('orientation' in params
                            and params['orientation'] is not None):
                        tophat_cmd += (' --' + params['orientation'])
                else:
                    # TODO: the following can be read from PEL object
                    if ('orientation' in params
                            and params['orientation'] is not None):
                        tophat_cmd += (' --' + params['orientation'])
                tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(
                    output_dir, gtf_file, bowtie2_base, ds['fwd'], ds['rev'])

#                if sample_type  == 'KBaseAssembly.SingleEndLibrary' or sample_type  == 'KBaseFile.SingleEndLibrary':
#                        lib_type = 'SingleEnd'
#                        if sample_type == 'KBaseAssembly.SingleEndLibrary':
#                            read_id = r_sample['data']['handle']['id']
#                            read_name =  r_sample['data']['handle']['file_name']
#                        else:
#                            read_id = r_sample['data']['lib']['file']['id']
#                            read_name =  r_sample['data']['lib']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read_id,filename=read_name, directory=directory,token=token)
#                		tophat_cmd += ' -o {0} -G {1} {2} {3}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read_name))
#                        except Exception,e:
#                                self.logger.exception(e)
#                                raise Exception( "Unable to download shock file , {0}".format(read_name))
#                if sample_type == 'KBaseAssembly.PairedEndLibrary' or sample_type == 'KBaseFile.PairedEndLibrary':
#                        lib_type = 'PairedEnd'
#                        if sample_type == 'KBaseAssembly.PairedEndLibrary':
#                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['handle_1']['id']
#                            read1_name = r_sample['data']['handle_1']['file_name']
#                            read2_id = r_sample['data']['handle_2']['id']
#                            read2_name = r_sample['data']['handle_2']['file_name']
#                        else:
#                            # TODO: the following can be read from PEL object
#                            if('orientation' in params and params['orientation'] is not None): tophat_cmd += ( ' --'+params['orientation'])
#                            read1_id = r_sample['data']['lib1']['file']['id']
#                            read1_name = r_sample['data']['lib1']['file']['file_name']
#                            read2_id = r_sample['data']['lib2']['file']['id']
#                            read2_name = r_sample['data']['lib2']['file']['file_name']
#                        try:
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read1_id,filename=read1_name, directory=directory,token=token)
#                                script_util.download_file_from_shock(self.logger, shock_service_url=self.urls['shock_service_url'], shock_id=read2_id,filename=read2_name, directory=directory,token=token)
#                		tophat_cmd += ' -o {0} -G {1} {2} {3} {4}'.format(output_dir,gtf_file,bowtie2_base,os.path.join(directory,read1_name),os.path.join(directory,read2_name))
#                        except Exception,e:
#                                raise Exception( "Unable to download shock file , {0} or {1}".format(read1_name,read2_name))
            try:
                self.logger.info("Executing: tophat {0}".format(tophat_cmd))
                cmdline_output, cmd_err = script_util.runProgram(
                    self.logger, "tophat", tophat_cmd, None, directory)
            except Exception, e:
                raise Exception("Failed to run command {0}\n{1}\n{2}".format(
                    tophat_cmd, cmdline_output, cmd_err))