transform_index.left_join(1, tid_gid, 1) gid_cnctype = transform_index.get_col('gid', 'index') gid_cnctype = gid_cnctype.de_redundency() gid_cnctype = gid_cnctype.key_by('gid', 'index') coding = gid_cnctype.eget('union', '_index=coding') noncoding = gid_cnctype.eget('union', '_index=noncoding') ambiguity = gid_cnctype.eget('union', '_index=coding,noncoding', '_index=noncoding,coding') exon_number = gtf.getExon() transform_index.left_join(1, exon_number, 1) transform_index = transform_index.get_row_by_func(check_lncRNA) all_gids = de_redundency(tid_gid.getCol('gid')) C = coding.getCol('gid') print "Novel coding genes:", len(C) N = noncoding.getCol('gid') AM = ambiguity.getCol('gid') print "Ambiguous genes:", len(AM) lnc = de_redundency(transform_index.getCol('gid')) lnc = intersect_array(N, lnc) print "Novel lincRNA genes:", len(lnc) discard = sub_array(all_gids, C) discard = sub_array(discard, AM)
gid_cnctype=transform_index.get_col('gid','index') gid_cnctype=gid_cnctype.de_redundency() gid_cnctype=gid_cnctype.key_by('gid','index') coding=gid_cnctype.eget('union','_index=coding') noncoding=gid_cnctype.eget('union','_index=noncoding') ambiguity=gid_cnctype.eget('union','_index=coding,noncoding','_index=noncoding,coding') exon_number=gtf.getExon() transform_index.left_join(1,exon_number,1) transform_index=transform_index.get_row_by_func(check_lncRNA) all_gids=de_redundency(tid_gid.getCol('gid')) C=coding.getCol('gid') print "Novel coding genes:",len(C) N=noncoding.getCol('gid') AM=ambiguity.getCol('gid') print "Ambiguous genes:",len(AM) lnc=de_redundency(transform_index.getCol('gid')) lnc=intersect_array(N,lnc) print "Novel lincRNA genes:",len(lnc) discard=sub_array(all_gids,C)