Example #1
0
def RNAmetasource2source(parameterlist):
    """
    Output:
        Gene Dict: Transcript Dict: chromosome, strand, startbase, cigar, txsize
    """
    # empirical parameter
    genewithnovelskippedexonpct=50
    metasourcefilename=parameterlist[0][0]
    filetype=parameterlist[1][0]
    if parameterlist[1][1]=='no_novel_transcript':
        novel_skipped_exon_flg=0
    else:
        novel_skipped_exon_flg=1
    numgenes=int(parameterlist[1][2])
    genelist=parameterlist[1][3]
    
    if filetype=='gtf':
        metasource=gtffile.gtfFile(metasourcefilename)
        genetranscriptdict=metasource.getgenetranscriptdict()
    elif filetype=='pck':
        genetranscriptdict=cPickle.load(open(metasourcefilename))
    
    allgenes=genetranscriptdict.keys()
    if len(genelist)==0:
        if numgenes==0:
            numgenes=len(genetranscriptdict)
        else:
            numgenes=min(numgenes,len(genetranscriptdict))
        choosegenes=random.sample(allgenes,numgenes)
    else:
        choosegenes=genelist[0:]
    
    #print len(genelist),len(choosegenes)
    
    outtranscriptdict={}
    for gene in choosegenes:
        txdict=genetranscriptdict[gene]
        txexonslist=[txdict[tx][2] for tx in txdict]
        if novel_skipped_exon_flg==1 and common.toss(genewithnovelskippedexonpct/100.0):
            newtxflg=0
            trys=0
            while not newtxflg and trys<10:
                trys+=1
                tx=random.choice(txdict.keys())
                txexons=txdict[tx][2]
                if len(txexons)>2:
                    skipped=random.randint(1,len(txexons)-2)
                    newtx=txexons[:skipped-1]+txexons[skipped:]
                if newtx not in txexonslist:
                    newtxflg=1
                    txdict['%s_skip'%tx]=[txdict[tx][0],txdict[tx][1],newtx]
        outtranscriptdict[gene]={}
        for tx in txdict:
            cigartup=common.exonlist2cigar(txdict[tx][2])
            outtranscriptdict[gene][tx]=[txdict[tx][0],txdict[tx][1],cigartup[0],cigartup[1],cigartup[2]]
    return outtranscriptdict                    
Example #2
0
        cfg.setfdmfastpair(options.fdm_fast_pair)
        if options.analysis_genefile != '':
            cfg.setfdmgenefile(options.analysis_genefile)

    if options.fdm_full_pair != '':
        cfg.setfdmfullpair(options.fdm_full_pair)
        if options.analysis_genefile != '':
            cfg.setfdmgenefile(options.analysis_genefile)

    message = 'Program Started'
    common.printstatus(message, 'S', common.func_name())

    rootdir = cfg.root_dir
    gtffilename = cfg.annotation_file

    gtffile = gtffile.gtfFile(gtffilename, 0, cfg.annotation_file_type)
    gtffile.getgenetranscriptdict()

    geneannodivdict = gtffile.getgeneannodivdict()

    annochrjuncdict = gtffile.getchrjuncdict()

    islanddict = gtffile.getislanddict()
    genelist = islanddict.keys()
    chrlist = gtffile.getchrlist()
    islandlist = gtffile.getislandlist()
    chrgenelistdict = {}
    for island in islandlist:
        chrnm = '%s' % island[0]
        if chrnm not in chrgenelistdict:
            chrgenelistdict[chrnm] = [island[3]]
Example #3
0
        cfg.setfdmfastpair(options.fdm_fast_pair)     
        if options.analysis_genefile!='':   
            cfg.setfdmgenefile(options.analysis_genefile)
 
    if options.fdm_full_pair!='':
        cfg.setfdmfullpair(options.fdm_full_pair)     
        if options.analysis_genefile!='':   
            cfg.setfdmgenefile(options.analysis_genefile)
 
    message='Program Started'
    common.printstatus(message,'S',common.func_name())
    
    rootdir=cfg.root_dir
    gtffilename=cfg.annotation_file  

    gtffile=gtffile.gtfFile(gtffilename,0,cfg.annotation_file_type)
    gtffile.getgenetranscriptdict()
   
    geneannodivdict=gtffile.getgeneannodivdict()   
        
    annochrjuncdict=gtffile.getchrjuncdict()
    
    islanddict=gtffile.getislanddict()
    genelist=islanddict.keys()
    chrlist=gtffile.getchrlist()
    islandlist=gtffile.getislandlist()
    chrgenelistdict={}
    for island in islandlist:
        chrnm='%s'%island[0]
        if chrnm not in chrgenelistdict:
            chrgenelistdict[chrnm]=[island[3]]
Example #4
0
def RNAmetasource2source(parameterlist):
    """
    Output:
        Gene Dict: Transcript Dict: chromosome, strand, startbase, cigar, txsize
    """
    # empirical parameter
    genewithnovelskippedexonpct = 50
    metasourcefilename = parameterlist[0][0]
    filetype = parameterlist[1][0]
    if parameterlist[1][1] == 'no_novel_transcript':
        novel_skipped_exon_flg = 0
    else:
        novel_skipped_exon_flg = 1
    numgenes = int(parameterlist[1][2])
    genelist = parameterlist[1][3]

    if filetype == 'gtf':
        metasource = gtffile.gtfFile(metasourcefilename)
        genetranscriptdict = metasource.getgenetranscriptdict()
    elif filetype == 'pck':
        genetranscriptdict = cPickle.load(open(metasourcefilename))

    allgenes = genetranscriptdict.keys()
    if len(genelist) == 0:
        if numgenes == 0:
            numgenes = len(genetranscriptdict)
        else:
            numgenes = min(numgenes, len(genetranscriptdict))
        choosegenes = random.sample(allgenes, numgenes)
    else:
        choosegenes = genelist[0:]

    #print len(genelist),len(choosegenes)

    outtranscriptdict = {}
    for gene in choosegenes:
        txdict = genetranscriptdict[gene]
        txexonslist = [txdict[tx][2] for tx in txdict]
        if novel_skipped_exon_flg == 1 and common.toss(
                genewithnovelskippedexonpct / 100.0):
            newtxflg = 0
            trys = 0
            while not newtxflg and trys < 10:
                trys += 1
                tx = random.choice(txdict.keys())
                txexons = txdict[tx][2]
                if len(txexons) > 2:
                    skipped = random.randint(1, len(txexons) - 2)
                    newtx = txexons[:skipped - 1] + txexons[skipped:]
                if newtx not in txexonslist:
                    newtxflg = 1
                    txdict['%s_skip' %
                           tx] = [txdict[tx][0], txdict[tx][1], newtx]
        outtranscriptdict[gene] = {}
        for tx in txdict:
            cigartup = common.exonlist2cigar(txdict[tx][2])
            outtranscriptdict[gene][tx] = [
                txdict[tx][0], txdict[tx][1], cigartup[0], cigartup[1],
                cigartup[2]
            ]
    return outtranscriptdict