gtf_name = gtf_obj['data']['handle']['file_name'] try: script_util.download_file_from_shock( logger, shock_service_url=services['shock_service_url'], shock_id=gtf_id, filename=gtf_name, directory=hisat2_dir, token=token) gtf_file = os.path.join(hisat2_dir, gtf_name) except Exception, e: raise Exception( "Unable to download shock file, {0}".format(gtf_name)) else: script_util.create_gtf_annotation(logger, ws_client, hs, services, params['ws_id'], annotation_id, params['genome_id'], fasta_file, hisat2_dir, token) # Determine the num_threads provided by the user otherwise default the number of threads to 2 if ('num_threads' in params and params['num_threads'] is not None): num_threads = int(params['num_threads']) else: num_threads = 2 num_cores = mp.cpu_count() logger.info("Number of available cores : {0}".format(num_cores)) b_tasks = [] if sample_type == 'KBaseRNASeq.RNASeqSampleSet': reads = sample['data']['sample_ids'] reads_type = sample['data']['Library_type'] r_label = sample['data']['condition'] num_samples = len(reads) if num_cores != 1:
"Unable to download shock file, {0}".format(gtf_name)) else: fasta_file = script_util.generate_fasta(logger, services, token, annotation_id, diffexp_dir, annotation_name) logger.info("Sanitizing the fasta file to correct id names {}".format( datetime.datetime.utcnow())) mapping_filename = c_mapping.create_sanitized_contig_ids(fasta_file) c_mapping.replace_fasta_contig_ids(fasta_file, mapping_filename, to_modified=True) logger.info("Generating FASTA file completed successfully : {}".format( datetime.datetime.utcnow())) gtf_file = script_util.create_gtf_annotation(logger, ws_client, hs, services, params['ws_id'], annotation_id, gtf_obj_name, fasta_file, diffexp_dir, token) m_expr_ids = e_sample['data']['mapped_expression_ids'] m_align_exp = [] labels = [] expressions = [] counter = 0 assembly_file = os.path.join(diffexp_dir, ASSEMBLY_GTF_FN) list_file = open(assembly_file, 'w') for i in m_expr_ids: for a_id, e_id in i.items(): #print a_id + ":" + e_id files = {} a_obj, e_obj = ws_client.get_objects([{ 'ref': a_id
gtf_info = ws_client.get_object_info_new({"objects": [{'name': gtf_obj_name, 'workspace': params['ws_id']}]})[0] gtf_annotation_id = str(gtf_info[6]) + '/' + str(gtf_info[0]) + '/' + str(gtf_info[4]) gtf_id=gtf_obj['data']['handle']['id'] gtf_name=gtf_obj['data']['handle']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=gtf_id,filename=gtf_name, directory=diffexp_dir,token=token) gtf_file = os.path.join(diffexp_dir,gtf_name) except Exception,e: raise Exception( "Unable to download shock file, {0}".format(gtf_name)) else: fasta_file= script_util.generate_fasta(logger,services,token,annotation_id,diffexp_dir,annotation_name) logger.info("Sanitizing the fasta file to correct id names {}".format(datetime.datetime.utcnow())) mapping_filename = c_mapping.create_sanitized_contig_ids(fasta_file) c_mapping.replace_fasta_contig_ids(fasta_file, mapping_filename, to_modified=True) logger.info("Generating FASTA file completed successfully : {}".format(datetime.datetime.utcnow())) gtf_file = script_util.create_gtf_annotation(logger,ws_client,hs,services,params['ws_id'],annotation_id,gtf_obj_name,fasta_file,diffexp_dir,token) m_expr_ids = e_sample['data']['mapped_expression_ids'] m_align_exp = [] labels = [] expressions = [] counter = 0 assembly_file = os.path.join(diffexp_dir,ASSEMBLY_GTF_FN) list_file = open(assembly_file,'w') for i in m_expr_ids: for a_id ,e_id in i.items(): #print a_id + ":" + e_id files = {} a_obj,e_obj = ws_client.get_objects( [{'ref' : a_id},{'ref': e_id}]) ### Get the condition name, replicate_id , shock_id and shock_filename condition = a_obj['data']['condition']
ws_gtf = params['genome_id']+"_GTF" ret = script_util.if_obj_exists(None,ws_client,params['ws_id'],"KBaseRNASeq.GFFAnnotation",[ws_gtf]) print ret if not ret is None: logger.info("GFF Annotation Exist for Genome Annotation {0}.... Skipping step ".format(params['genome_id'])) annot_name,annot_id = ret[0] gtf_obj=ws_client.get_objects([{'ref' : annot_id}])[0] gtf_id=gtf_obj['data']['handle']['id'] gtf_name=gtf_obj['data']['handle']['file_name'] try: script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=gtf_id,filename=gtf_name, directory=hisat2_dir,token=token) gtf_file = os.path.join(hisat2_dir,gtf_name) except Exception,e: raise Exception( "Unable to download shock file, {0}".format(gtf_name)) else: script_util.create_gtf_annotation(logger,ws_client,hs,services,params['ws_id'],annotation_id,params['genome_id'],fasta_file,hisat2_dir,token) # Determine the num_threads provided by the user otherwise default the number of threads to 2 if('num_threads' in params and params['num_threads'] is not None): num_threads = int(params['num_threads']) else: num_threads = 2 num_cores = mp.cpu_count() logger.info("Number of available cores : {0}".format(num_cores)) b_tasks =[] if sample_type == 'KBaseRNASeq.RNASeqSampleSet': reads = sample['data']['sample_ids'] reads_type= sample['data']['Library_type'] r_label = sample['data']['condition'] num_samples = len(reads) if num_cores != 1: pool_size,num_threads=handler_util.optimize_parallel_run(num_samples,num_threads,num_cores)