예제 #1
0
exon_number = gtf.getExon()
transform_index.left_join(1, exon_number, 1)

transform_index = transform_index.get_row_by_func(check_lncRNA)

all_gids = de_redundency(tid_gid.getCol('gid'))
C = coding.getCol('gid')
print "Novel coding genes:", len(C)
N = noncoding.getCol('gid')
AM = ambiguity.getCol('gid')
print "Ambiguous genes:", len(AM)
lnc = de_redundency(transform_index.getCol('gid'))
lnc = intersect_array(N, lnc)
print "Novel lincRNA genes:", len(lnc)
discard = sub_array(all_gids, C)
discard = sub_array(discard, AM)
discard = sub_array(discard, lnc)
print "Filter out noncoding genes:", len(discard)

gene_class = classify(C, lnc, AM, discard)
gene_class.set_colnames('gene_id', 'class')
gene_class.write_to_file(Gene_Info)

gtf.sub_gtf(gtf.get_tid(discard)).write_to_file(snc_gtf)
gtf.sub_gtf(gtf.get_tid(lnc)).write_to_file(lnc_gtf)
gtf.sub_gtf(gtf.get_tid(C)).write_to_file(coding_gtf)
gtf.sub_gtf(gtf.get_tid(AM)).write_to_file(cnc_gtf)

run_time = int(time.time() - start_time)
exit("Run complete: " + "%d seconds elapsed " % run_time)
예제 #2
0
transform_index=transform_index.get_row_by_func(check_lncRNA)


all_gids=de_redundency(tid_gid.getCol('gid'))
C=coding.getCol('gid')
print "Novel coding genes:",len(C)
N=noncoding.getCol('gid')
AM=ambiguity.getCol('gid')
print "Ambiguous genes:",len(AM)
lnc=de_redundency(transform_index.getCol('gid'))
lnc=intersect_array(N,lnc)
print "Novel lincRNA genes:",len(lnc)
discard=sub_array(all_gids,C)
discard=sub_array(discard,AM)
discard=sub_array(discard,lnc)
print "Filter out noncoding genes:",len(discard)

gene_class=classify(C,lnc,AM,discard)
gene_class.set_colnames('gene_id','class')
gene_class.write_to_file(Gene_Info)

gtf.sub_gtf(gtf.get_tid(discard)).write_to_file(snc_gtf)
gtf.sub_gtf(gtf.get_tid(lnc)).write_to_file(lnc_gtf)
gtf.sub_gtf(gtf.get_tid(C)).write_to_file(coding_gtf)
gtf.sub_gtf(gtf.get_tid(AM)).write_to_file(cnc_gtf)

run_time=int(time.time() - start_time)
exit("Run complete: "+"%d seconds elapsed " % run_time)