exon_number = gtf.getExon() transform_index.left_join(1, exon_number, 1) transform_index = transform_index.get_row_by_func(check_lncRNA) all_gids = de_redundency(tid_gid.getCol('gid')) C = coding.getCol('gid') print "Novel coding genes:", len(C) N = noncoding.getCol('gid') AM = ambiguity.getCol('gid') print "Ambiguous genes:", len(AM) lnc = de_redundency(transform_index.getCol('gid')) lnc = intersect_array(N, lnc) print "Novel lincRNA genes:", len(lnc) discard = sub_array(all_gids, C) discard = sub_array(discard, AM) discard = sub_array(discard, lnc) print "Filter out noncoding genes:", len(discard) gene_class = classify(C, lnc, AM, discard) gene_class.set_colnames('gene_id', 'class') gene_class.write_to_file(Gene_Info) gtf.sub_gtf(gtf.get_tid(discard)).write_to_file(snc_gtf) gtf.sub_gtf(gtf.get_tid(lnc)).write_to_file(lnc_gtf) gtf.sub_gtf(gtf.get_tid(C)).write_to_file(coding_gtf) gtf.sub_gtf(gtf.get_tid(AM)).write_to_file(cnc_gtf) run_time = int(time.time() - start_time) exit("Run complete: " + "%d seconds elapsed " % run_time)
transform_index=transform_index.get_row_by_func(check_lncRNA) all_gids=de_redundency(tid_gid.getCol('gid')) C=coding.getCol('gid') print "Novel coding genes:",len(C) N=noncoding.getCol('gid') AM=ambiguity.getCol('gid') print "Ambiguous genes:",len(AM) lnc=de_redundency(transform_index.getCol('gid')) lnc=intersect_array(N,lnc) print "Novel lincRNA genes:",len(lnc) discard=sub_array(all_gids,C) discard=sub_array(discard,AM) discard=sub_array(discard,lnc) print "Filter out noncoding genes:",len(discard) gene_class=classify(C,lnc,AM,discard) gene_class.set_colnames('gene_id','class') gene_class.write_to_file(Gene_Info) gtf.sub_gtf(gtf.get_tid(discard)).write_to_file(snc_gtf) gtf.sub_gtf(gtf.get_tid(lnc)).write_to_file(lnc_gtf) gtf.sub_gtf(gtf.get_tid(C)).write_to_file(coding_gtf) gtf.sub_gtf(gtf.get_tid(AM)).write_to_file(cnc_gtf) run_time=int(time.time() - start_time) exit("Run complete: "+"%d seconds elapsed " % run_time)