コード例 #1
0
def getGeneData():    
    
    # Look for non-complements second arg in gb.GenLocs is False
    path = 'bacteriaGB/Genbank/'

    # lists for starts and nonStarts
    startList = []
    nonStartList = []
    for filename in os.listdir(path):
        #print(path + filename)
        data,dna = readGenesIn(path+filename)
        
        # Find the keyword locations and gene locations
        klocs = gb.FindKeywordLocs ( data )
        glocs = gb.GeneLocs ( data , klocs )
    
        for start in glocs:
            # check for valid gene
            goodness,stNum,endNum = preCheck(start,dna)
            if (goodness):
                # Check that the complement is False aka a Non-Complement
                isComp = start[1]
                codon = dna[stNum:stNum+3]
                
                if (not isComp and codon == 'atg'): # these are the START 
                    startList.append(dna[stNum-30:stNum+23])
                else:  # This is NOT a START
                    nonStartList.append(dna[stNum-30:stNum+23])

    return startList,nonStartList
コード例 #2
0
ファイル: brutecompare.py プロジェクト: lpincus/cds-hw12
def DumpSequences( gbname ):
    data = gb.ReadFile( gbname )
    klocs = gb.FindKeywordLocs(data)
    N = len( klocs )
    genes = []
    for i in range( N ):
        g = gb.Translation( data, klocs[i])
        genes.append( g )
    return genes
コード例 #3
0
def FileReadConvert(fname, codkeys):
    # fname is the name of a Genbank file
    # codkeys is codons.keys( ) were codons is from genbank.Codons()
    # read in the DNA sequences for the genes
    data = genbank.ReadGenbank(fname)
    dna = genbank.ParseDNA(data)
    klocs = genbank.FindKeywordLocs(data)
    glocs = genbank.GeneLocs(data, klocs)
    NG = len(glocs)  # number of genes
    codons = []
    for i in range(NG):
        # extract DNA for this sequence
        cdna = genbank.GetCodingDNA(dna, glocs[i])
        # convert to codons
        c = []  # codons for this gene
        for j in range(0, len(cdna), 3):
            c.append(codkeys.index(cdna[j:j + 3]))
        codons.append(c)
    return codons
コード例 #4
0
def GetData(fn):
    gb = genbank.ReadGenbank(fn)
    dna = genbank.ParseDNA(gb)
    klocs = genbank.FindKeywordLocs(gb)
    genes = genbank.GeneLocs(gb, klocs)
    return genes, dna