exon_num = int(options.exon_num) start_time = time.time() print "Run start:" ####################################### make_dir(out_dir) #gtf_outdir=out_dir+'/'+'gtf' #tid_gid_file=gtf_outdir+"/"+"tid_gid" #make_dir(gtf_outdir) snc_gtf = out_dir + "/" + 'filter_out_noncoding.gtf' lnc_gtf = out_dir + "/" + 'novel_lincRNA.gtf' coding_gtf = out_dir + "/" + 'novel_coding.gtf' cnc_gtf = out_dir + "/" + 'ambiguous_genes.gtf' Gene_Info = out_dir + '/compare_2_infor.txt' ################################### gtf = Gtf(gtf_input) tid_gid = gtf.get_tid_gid() #index_content=Table(index_file,0,False) #index_content.write_to_file('index_content') #transform_index_content=index_content.cal(index_transform,'row') #transform_index=Table.build_table(1,['tid','cnc_type','score','start','end','length'],transform_index_content) transform_index = Table(index_file, 1, True) transform_index.left_join(1, tid_gid, 1) gid_cnctype = transform_index.get_col('gid', 'index') gid_cnctype = gid_cnctype.de_redundency() gid_cnctype = gid_cnctype.key_by('gid', 'index')
elif overlap_noncoding!="": return overlap_noncoding else: return "-" else: return "-" (coding_ref_input,noncoding_ref_input,out_dir,combined_gtf_input)=fetch_args() start_time=time.time() print "Classification start:" (cuffcompare_outdir,gtf_outdir)=prepare_dirs(out_dir) (coding_ref,noncoding_ref,combined_gtf)=fetch_gtfs( coding_ref_input, noncoding_ref_input, combined_gtf_input, cuffcompare_outdir) gtf=Gtf.simple_read(combined_gtf_input) tid_gid=gtf.get_tid_gid() gid_tid=tid_gid.key_by([2],[1]) all_gids=gid_tid.getCol(1) print "Input genes: ", len(all_gids) print "Running Cuffcompare ..." strand_coding_tmap=cuffcompare(coding_ref_input,combined_gtf_input,cuffcompare_outdir+'/strandc') strand_coding_tmap.update_col('cuff_gene_id',gtf.get_gid(strand_coding_tmap.getCol('cuff_id'))) strand_noncoding_tmap=cuffcompare(noncoding_ref_input,combined_gtf_input,cuffcompare_outdir+'/strandnc') strand_noncoding_tmap.update_col('cuff_gene_id',gtf.get_gid(strand_noncoding_tmap.getCol('cuff_id'))) coding_tmap=cuffcompare(coding_ref,combined_gtf,cuffcompare_outdir+'/coding') coding_tmap.update_col('cuff_gene_id',gtf.get_gid(coding_tmap.getCol('cuff_id')))
exon_num=int(options.exon_num) start_time = time.time() print "Run start:" ####################################### make_dir(out_dir) #gtf_outdir=out_dir+'/'+'gtf' #tid_gid_file=gtf_outdir+"/"+"tid_gid" #make_dir(gtf_outdir) snc_gtf=out_dir+"/"+'filter_out_noncoding.gtf' lnc_gtf=out_dir+"/"+'novel_lincRNA.gtf' coding_gtf=out_dir+"/"+'novel_coding.gtf' cnc_gtf=out_dir+"/"+'ambiguous_genes.gtf' Gene_Info = out_dir + '/compare_2_infor.txt' ################################### gtf=Gtf(gtf_input) tid_gid=gtf.get_tid_gid() #index_content=Table(index_file,0,False) #index_content.write_to_file('index_content') #transform_index_content=index_content.cal(index_transform,'row') #transform_index=Table.build_table(1,['tid','cnc_type','score','start','end','length'],transform_index_content) transform_index=Table(index_file,1,True) transform_index.left_join(1,tid_gid,1) gid_cnctype=transform_index.get_col('gid','index') gid_cnctype=gid_cnctype.de_redundency()
return overlap_noncoding else: return "-" else: return "-" (coding_ref_input, noncoding_ref_input, out_dir, combined_gtf_input) = fetch_args() start_time = time.time() print "Classification start:" (cuffcompare_outdir, gtf_outdir) = prepare_dirs(out_dir) (coding_ref, noncoding_ref, combined_gtf) = fetch_gtfs(coding_ref_input, noncoding_ref_input, combined_gtf_input, cuffcompare_outdir) gtf = Gtf.simple_read(combined_gtf_input) tid_gid = gtf.get_tid_gid() gid_tid = tid_gid.key_by([2], [1]) all_gids = gid_tid.getCol(1) print "Input genes: ", len(all_gids) print "Running Cuffcompare ..." strand_coding_tmap = cuffcompare(coding_ref_input, combined_gtf_input, cuffcompare_outdir + '/strandc') strand_coding_tmap.update_col( 'cuff_gene_id', gtf.get_gid(strand_coding_tmap.getCol('cuff_id'))) strand_noncoding_tmap = cuffcompare(noncoding_ref_input, combined_gtf_input, cuffcompare_outdir + '/strandnc') strand_noncoding_tmap.update_col( 'cuff_gene_id', gtf.get_gid(strand_noncoding_tmap.getCol('cuff_id')))