Python create_sanitized_contig_ids 예제들, contig_id_mapping.create_sanitized_contig_ids Python 예제들

예제 #1

0

파일 보기

def create_gtf_annotation(logger,ws_client,hs_client,internal_services,ws_id,genome_ref,genome_id,fasta_file,directory,token):
        try:
		tmp_file = os.path.join(directory,genome_id + "_GFF.gff")
        	fasta_file= generate_fasta(logger,internal_services,token,genome_ref,directory,genome_id)
            	logger.info("Sanitizing the fasta file to correct id names {}".format(datetime.datetime.utcnow()))
                mapping_filename = c_mapping.create_sanitized_contig_ids(fasta_file)
                c_mapping.replace_fasta_contig_ids(fasta_file, mapping_filename, to_modified=True)
                logger.info("Generating FASTA file completed successfully : {}".format(datetime.datetime.utcnow()))
                generate_gff(logger,internal_services,token,genome_ref,directory,genome_id,tmp_file)
                c_mapping.replace_gff_contig_ids(tmp_file, mapping_filename, to_modified=True)
                gtf_path = os.path.join(directory,genome_id+"_GTF.gtf")
                gtf_cmd = " -E {0} -T -o {1}".format(tmp_file,gtf_path)
                try:
                   logger.info("Executing: gffread {0}".format(gtf_cmd))
                   cmdline_output = runProgram(None,"gffread",gtf_cmd,None,directory)
                except Exception as e:
                   raise Exception("Error Converting the GFF file to GTF using gffread {0},{1}".format(gtf_cmd,"".join(traceback.format_exc())))
		#if os.path.exists(tmp_file): os.remove(tmp_file)
                if os.path.exists(gtf_path):
                               annotation_handle = hs_client.upload(gtf_path)
                               a_handle = { "handle" : annotation_handle ,"size" : os.path.getsize(gtf_path), 'genome_id' : genome_ref}
                ##Saving GFF/GTF annotation to the workspace
                res= ws_client.save_objects(
                                        {"workspace":ws_id,
                                         "objects": [{
                                         "type":"KBaseRNASeq.GFFAnnotation",
                                         "data":a_handle,
                                         "name":genome_id+"_GTF_Annotation",
                                        "hidden":1}
                                        ]})
        except Exception as e:
                raise ValueError("Generating GTF file from Genome Annotation object Failed :  {}".format("".join(traceback.format_exc())))
	return gtf_path

예제 #2

0

파일 보기

    def _create_gtf_annotation_from_genome(self, genome_ref, result_directory):
        """
         Create reference annotation file from genome
        """
        ref = self.ws.get_object_subset([{
            'ref':
            genome_ref,
            'included': ['contigset_ref', 'assembly_ref']
        }])
        if 'contigset_ref' in ref[0]['data']:
            contig_id = ref[0]['data']['contigset_ref']
        elif 'assembly_ref' in ref[0]['data']:
            contig_id = ref[0]['data']['assembly_ref']
        if contig_id is None:
            raise ValueError(
                "Genome at {0} does not have reference to the assembly object".
                format(genome_ref))
        print contig_id
        log("Generating GFF file from Genome")
        try:
            ret = self.au.get_assembly_as_fasta({'ref': contig_id})
            fa_output_file = ret['path']

            shutil.copy(fa_output_file, result_directory)
            fa_output_name = os.path.basename(fa_output_file)
            fa_output_file = os.path.join(result_directory, fa_output_name)

            mapping_filename = c_mapping.create_sanitized_contig_ids(
                fa_output_file)

            # get the GFF
            ret = self.gfu.genome_to_gff({
                'genome_ref': genome_ref,
                'target_dir': result_directory
            })
            genome_gff_file = ret['file_path']
            c_mapping.replace_gff_contig_ids(genome_gff_file,
                                             mapping_filename,
                                             to_modified=True)
            gtf_ext = ".gtf"

            if not genome_gff_file.endswith(gtf_ext):
                gtf_path = os.path.splitext(genome_gff_file)[0] + '.gtf'
                self._run_gffread(genome_gff_file, gtf_path)
            else:
                gtf_path = genome_gff_file

            log("gtf file : " + gtf_path)
        except Exception:
            raise ValueError(
                "Generating GTF file from Genome Annotation object Failed :  {}"
                .format("".join(traceback.format_exc())))

        return gtf_path

예제 #3

0

파일 보기

파일: call_hisat2.py 프로젝트: ugswork/KBaseRNASeq

        e_ws_objs = script_util.if_ws_obj_exists(None, ws_client,
                                                 params['ws_id'], r_type,
                                                 reads)
        missing_objs = [i for i in reads if not i in e_ws_objs]
        if len(e_ws_objs) != len(reads):
            raise Exception(
                'Missing Library objects {0} in the {1}. please copy them and run this method'
                .format(",".join(missing_objs), params['ws_id']))

    ### Build Hisat2 index
    fasta_file = script_util.generate_fasta(logger, services, token,
                                            annotation_id, hisat2_dir,
                                            params['genome_id'])
    logger.info("Sanitizing the fasta file to correct id names {}".format(
        datetime.datetime.utcnow()))
    mapping_filename = c_mapping.create_sanitized_contig_ids(fasta_file)
    c_mapping.replace_fasta_contig_ids(fasta_file,
                                       mapping_filename,
                                       to_modified=True)
    logger.info("Generating FASTA file completed successfully : {}".format(
        datetime.datetime.utcnow()))
    hisat2base = os.path.join(
        hisat2_dir, handler_util.get_file_with_suffix(hisat2_dir, ".fasta"))
    hisat2base_cmd = '{0} {1}'.format(fasta_file, hisat2base)
    try:
        logger.info("Building Index for Hisat2 {0}".format(hisat2base_cmd))
        cmdline_output = script_util.runProgram(logger, "hisat2-build",
                                                hisat2base_cmd, None,
                                                hisat2_dir)
    except Exception, e:
        raise Exception("Failed to run command {0}".format(hisat2base_cmd))

예제 #4

0

파일 보기

파일: call_diffExpCallforBallgown.py 프로젝트: briehl/KBaseRNASeq

            if not ret is None:
                logger.info("GFF Annotation Exist for Genome Annotation {0}.... Skipping step ".format(annotation_name))
                gtf_obj= ws_client.get_objects([{'name' : gtf_obj_name,'workspace' : params['ws_id']}])[0]
                gtf_info = ws_client.get_object_info_new({"objects": [{'name': gtf_obj_name, 'workspace': params['ws_id']}]})[0]
                gtf_annotation_id = str(gtf_info[6]) + '/' + str(gtf_info[0]) + '/' + str(gtf_info[4])
                gtf_id=gtf_obj['data']['handle']['id']
                gtf_name=gtf_obj['data']['handle']['file_name']
                try:
                     script_util.download_file_from_shock(logger, shock_service_url=services['shock_service_url'], shock_id=gtf_id,filename=gtf_name, directory=diffexp_dir,token=token)
                     gtf_file = os.path.join(diffexp_dir,gtf_name)
                except Exception,e:
                     raise Exception( "Unable to download shock file, {0}".format(gtf_name))
            else:
                fasta_file= script_util.generate_fasta(logger,services,token,annotation_id,diffexp_dir,annotation_name)
                logger.info("Sanitizing the fasta file to correct id names {}".format(datetime.datetime.utcnow()))
                mapping_filename = c_mapping.create_sanitized_contig_ids(fasta_file)
                c_mapping.replace_fasta_contig_ids(fasta_file, mapping_filename, to_modified=True)
                logger.info("Generating FASTA file completed successfully : {}".format(datetime.datetime.utcnow()))
                gtf_file = script_util.create_gtf_annotation(logger,ws_client,hs,services,params['ws_id'],annotation_id,gtf_obj_name,fasta_file,diffexp_dir,token)
            m_expr_ids = e_sample['data']['mapped_expression_ids']
	    m_align_exp = []
            labels = []
            expressions = []
            counter = 0
            assembly_file = os.path.join(diffexp_dir,ASSEMBLY_GTF_FN)
            list_file = open(assembly_file,'w')
            for i in m_expr_ids:
                for a_id ,e_id in i.items():
                        #print a_id  + ":" + e_id
                        files = {}
                        a_obj,e_obj = ws_client.get_objects(