def RNAmetasource2source(parameterlist): """ Output: Gene Dict: Transcript Dict: chromosome, strand, startbase, cigar, txsize """ # empirical parameter genewithnovelskippedexonpct=50 metasourcefilename=parameterlist[0][0] filetype=parameterlist[1][0] if parameterlist[1][1]=='no_novel_transcript': novel_skipped_exon_flg=0 else: novel_skipped_exon_flg=1 numgenes=int(parameterlist[1][2]) genelist=parameterlist[1][3] if filetype=='gtf': metasource=gtffile.gtfFile(metasourcefilename) genetranscriptdict=metasource.getgenetranscriptdict() elif filetype=='pck': genetranscriptdict=cPickle.load(open(metasourcefilename)) allgenes=genetranscriptdict.keys() if len(genelist)==0: if numgenes==0: numgenes=len(genetranscriptdict) else: numgenes=min(numgenes,len(genetranscriptdict)) choosegenes=random.sample(allgenes,numgenes) else: choosegenes=genelist[0:] #print len(genelist),len(choosegenes) outtranscriptdict={} for gene in choosegenes: txdict=genetranscriptdict[gene] txexonslist=[txdict[tx][2] for tx in txdict] if novel_skipped_exon_flg==1 and common.toss(genewithnovelskippedexonpct/100.0): newtxflg=0 trys=0 while not newtxflg and trys<10: trys+=1 tx=random.choice(txdict.keys()) txexons=txdict[tx][2] if len(txexons)>2: skipped=random.randint(1,len(txexons)-2) newtx=txexons[:skipped-1]+txexons[skipped:] if newtx not in txexonslist: newtxflg=1 txdict['%s_skip'%tx]=[txdict[tx][0],txdict[tx][1],newtx] outtranscriptdict[gene]={} for tx in txdict: cigartup=common.exonlist2cigar(txdict[tx][2]) outtranscriptdict[gene][tx]=[txdict[tx][0],txdict[tx][1],cigartup[0],cigartup[1],cigartup[2]] return outtranscriptdict
cfg.setfdmfastpair(options.fdm_fast_pair) if options.analysis_genefile != '': cfg.setfdmgenefile(options.analysis_genefile) if options.fdm_full_pair != '': cfg.setfdmfullpair(options.fdm_full_pair) if options.analysis_genefile != '': cfg.setfdmgenefile(options.analysis_genefile) message = 'Program Started' common.printstatus(message, 'S', common.func_name()) rootdir = cfg.root_dir gtffilename = cfg.annotation_file gtffile = gtffile.gtfFile(gtffilename, 0, cfg.annotation_file_type) gtffile.getgenetranscriptdict() geneannodivdict = gtffile.getgeneannodivdict() annochrjuncdict = gtffile.getchrjuncdict() islanddict = gtffile.getislanddict() genelist = islanddict.keys() chrlist = gtffile.getchrlist() islandlist = gtffile.getislandlist() chrgenelistdict = {} for island in islandlist: chrnm = '%s' % island[0] if chrnm not in chrgenelistdict: chrgenelistdict[chrnm] = [island[3]]
cfg.setfdmfastpair(options.fdm_fast_pair) if options.analysis_genefile!='': cfg.setfdmgenefile(options.analysis_genefile) if options.fdm_full_pair!='': cfg.setfdmfullpair(options.fdm_full_pair) if options.analysis_genefile!='': cfg.setfdmgenefile(options.analysis_genefile) message='Program Started' common.printstatus(message,'S',common.func_name()) rootdir=cfg.root_dir gtffilename=cfg.annotation_file gtffile=gtffile.gtfFile(gtffilename,0,cfg.annotation_file_type) gtffile.getgenetranscriptdict() geneannodivdict=gtffile.getgeneannodivdict() annochrjuncdict=gtffile.getchrjuncdict() islanddict=gtffile.getislanddict() genelist=islanddict.keys() chrlist=gtffile.getchrlist() islandlist=gtffile.getislandlist() chrgenelistdict={} for island in islandlist: chrnm='%s'%island[0] if chrnm not in chrgenelistdict: chrgenelistdict[chrnm]=[island[3]]
def RNAmetasource2source(parameterlist): """ Output: Gene Dict: Transcript Dict: chromosome, strand, startbase, cigar, txsize """ # empirical parameter genewithnovelskippedexonpct = 50 metasourcefilename = parameterlist[0][0] filetype = parameterlist[1][0] if parameterlist[1][1] == 'no_novel_transcript': novel_skipped_exon_flg = 0 else: novel_skipped_exon_flg = 1 numgenes = int(parameterlist[1][2]) genelist = parameterlist[1][3] if filetype == 'gtf': metasource = gtffile.gtfFile(metasourcefilename) genetranscriptdict = metasource.getgenetranscriptdict() elif filetype == 'pck': genetranscriptdict = cPickle.load(open(metasourcefilename)) allgenes = genetranscriptdict.keys() if len(genelist) == 0: if numgenes == 0: numgenes = len(genetranscriptdict) else: numgenes = min(numgenes, len(genetranscriptdict)) choosegenes = random.sample(allgenes, numgenes) else: choosegenes = genelist[0:] #print len(genelist),len(choosegenes) outtranscriptdict = {} for gene in choosegenes: txdict = genetranscriptdict[gene] txexonslist = [txdict[tx][2] for tx in txdict] if novel_skipped_exon_flg == 1 and common.toss( genewithnovelskippedexonpct / 100.0): newtxflg = 0 trys = 0 while not newtxflg and trys < 10: trys += 1 tx = random.choice(txdict.keys()) txexons = txdict[tx][2] if len(txexons) > 2: skipped = random.randint(1, len(txexons) - 2) newtx = txexons[:skipped - 1] + txexons[skipped:] if newtx not in txexonslist: newtxflg = 1 txdict['%s_skip' % tx] = [txdict[tx][0], txdict[tx][1], newtx] outtranscriptdict[gene] = {} for tx in txdict: cigartup = common.exonlist2cigar(txdict[tx][2]) outtranscriptdict[gene][tx] = [ txdict[tx][0], txdict[tx][1], cigartup[0], cigartup[1], cigartup[2] ] return outtranscriptdict