Beispiel #1
0
def test_cai():
    # first, make sure all arguments get the same result
    assert (CAI("AAC", reference=["AAC"]) == CAI("AAC", RSCUs=RSCU([
        "AAC"
    ])) == CAI("AAC", weights=relative_adaptiveness(sequences=["AAC"])) == 1.0)

    # check other sequences
    assert CAI("AAT", reference=["AAC"]) == 0.5
    assert CAI("AATAAT", reference=["AAC"]) == 0.5
    assert CAI("AAT" * 100, reference=["AAC"]) == 0.5
Beispiel #2
0
    def calc_CAI(self,dataa):
        
        self.__geneCount=len(self.__bacteriaORF)

        concatORF=[]
         
        for i in range(self.__geneCount):
            currentORF=self.get_ORF()[i].get_ORF()
            concatORF.append(currentORF)
            
        weights = relative_adaptiveness(concatORF)   
            
        cursor = dataa.cursor()

        for i in range(self.__geneCount):
            currentCAIvalue=CAI(str(self.get_ORF()[i].get_ORF()), weights=weights)

            sql='INSERT INTO geneExpressionEstimations(bacteriaID, geneID, CAI) VALUES (' + str(self.__bacteriaID) + ' ,' + str(self.get_UTR5()[i].get_geneID()) + ' ,' + str(currentCAIvalue) + ')'
            
            cursor.execute(sql) 
           
            dataa.commit()
from Bio import SeqIO
from CAI import CAI, RSCU, relative_adaptiveness

sequences = []

for seq_record in SeqIO.parse(
        "RhodosporidiumToruloidesRPGenes.fasta", "fasta"
):  #change file name       #Change File Name depending on organism
    dnaSeq = str(seq_record.seq.lower())
    if len(dnaSeq) % 3 != 0:
        print(seq_record.id)
    else:
        sequences.append(dnaSeq)

weights = relative_adaptiveness(sequences=sequences)
print(weights)

for seq_record in SeqIO.parse(
        "RhodosporidiumToruloidesRPGenes.fasta",
        "fasta"):  #changw file name depending on organism
    dnaSeq = str(seq_record.seq.lower())
    if len(dnaSeq) % 3 != 0:
        print(seq_record.id)
    else:
        print(CAI(dnaSeq, weights=weights))
def test_alternate_genetic_code():
    assert relative_adaptiveness(sequences=["AAC"], genetic_code=10) == {
        "AAA": 1.0,
        "AAC": 1.0,
        "AAG": 1.0,
        "AAT": (0.5 / (0.5 * (1 + 0.5))) / (1 / (0.5 * (1 + 0.5))),
        "ACA": 1.0,
        "ACC": 1.0,
        "ACG": 1.0,
        "ACT": 1.0,
        "AGA": 1.0,
        "AGC": 1.0,
        "AGG": 1.0,
        "AGT": 1.0,
        "ATA": 1.0,
        "ATC": 1.0,
        "ATG": 1.0,
        "ATT": 1.0,
        "CAA": 1.0,
        "CAC": 1.0,
        "CAG": 1.0,
        "CAT": 1.0,
        "CCA": 1.0,
        "CCC": 1.0,
        "CCG": 1.0,
        "CCT": 1.0,
        "CGA": 1.0,
        "CGC": 1.0,
        "CGG": 1.0,
        "CGT": 1.0,
        "CTA": 1.0,
        "CTC": 1.0,
        "CTG": 1.0,
        "CTT": 1.0,
        "GAA": 1.0,
        "GAC": 1.0,
        "GAG": 1.0,
        "GAT": 1.0,
        "GCA": 1.0,
        "GCC": 1.0,
        "GCG": 1.0,
        "GCT": 1.0,
        "GGA": 1.0,
        "GGC": 1.0,
        "GGG": 1.0,
        "GGT": 1.0,
        "GTA": 1.0,
        "GTC": 1.0,
        "GTG": 1.0,
        "GTT": 1.0,
        "TAC": 1.0,
        "TAT": 1.0,
        "TCA": 1.0,
        "TCC": 1.0,
        "TCG": 1.0,
        "TCT": 1.0,
        "TGA": 1.0,  # this is not a stop codon in genetic code 10
        "TGC": 1.0,
        "TGG": 1.0,
        "TGT": 1.0,
        "TTA": 1.0,
        "TTC": 1.0,
        "TTG": 1.0,
        "TTT": 1.0,
    }
def test_bad_args():
    # make sure bad arguments raise errors
    with pytest.raises(TypeError):
        relative_adaptiveness()
    with pytest.raises(TypeError):
        relative_adaptiveness(sequences=["AAC"], RSCUs=RSCU(["AAC"]))
def test_weights():
    assert relative_adaptiveness(sequences=["AAC"]) == {
        "AAA": 1.0,
        "AAC": 1.0,
        "AAG": 1.0,
        "AAT": (0.5 / (0.5 * (1 + 0.5))) / (1 / (0.5 * (1 + 0.5))),
        "ACA": 1.0,
        "ACC": 1.0,
        "ACG": 1.0,
        "ACT": 1.0,
        "AGA": 1.0,
        "AGC": 1.0,
        "AGG": 1.0,
        "AGT": 1.0,
        "ATA": 1.0,
        "ATC": 1.0,
        "ATG": 1.0,
        "ATT": 1.0,
        "CAA": 1.0,
        "CAC": 1.0,
        "CAG": 1.0,
        "CAT": 1.0,
        "CCA": 1.0,
        "CCC": 1.0,
        "CCG": 1.0,
        "CCT": 1.0,
        "CGA": 1.0,
        "CGC": 1.0,
        "CGG": 1.0,
        "CGT": 1.0,
        "CTA": 1.0,
        "CTC": 1.0,
        "CTG": 1.0,
        "CTT": 1.0,
        "GAA": 1.0,
        "GAC": 1.0,
        "GAG": 1.0,
        "GAT": 1.0,
        "GCA": 1.0,
        "GCC": 1.0,
        "GCG": 1.0,
        "GCT": 1.0,
        "GGA": 1.0,
        "GGC": 1.0,
        "GGG": 1.0,
        "GGT": 1.0,
        "GTA": 1.0,
        "GTC": 1.0,
        "GTG": 1.0,
        "GTT": 1.0,
        "TAC": 1.0,
        "TAT": 1.0,
        "TCA": 1.0,
        "TCC": 1.0,
        "TCG": 1.0,
        "TCT": 1.0,
        "TGC": 1.0,
        "TGG": 1.0,
        "TGT": 1.0,
        "TTA": 1.0,
        "TTC": 1.0,
        "TTG": 1.0,
        "TTT": 1.0,
    }
def test_arg_equivalence():
    # should be able to take either reference sequences or an RSCU dict
    assert relative_adaptiveness(sequences=["AAC"]) == relative_adaptiveness(
        RSCUs=RSCU(["AAC"]))
Beispiel #8
0
import argparse
from CAI import relative_adaptiveness
import os
import json
from Bio import SeqIO

parser = argparse.ArgumentParser()
parser.add_argument(
    "ref_fasta",
    help="fasta file containing reference sequences for given species")
args = parser.parse_args()

outfile = os.path.splitext(os.path.basename(args.ref_fasta))[0] + ".wts"

sequence = [
    str(seq.seq) for seq in SeqIO.parse(args.ref_fasta, "fasta")
    if len(seq) % 3 == 0
]  #bit of a hack
wts_dict = relative_adaptiveness(sequences=sequence)
with open(outfile, 'w+') as out:
    json.dump(wts_dict, out)
Beispiel #9
0
def test_alternate_genetic_code():
    assert relative_adaptiveness(sequences=["AAC"], genetic_code=10) == {
        'AAA': 1.0,
        'AAC': 1.0,
        'AAG': 1.0,
        'AAT': (0.5 / (0.5 * (1 + 0.5))) / (1 / (0.5 * (1 + 0.5))),
        'ACA': 1.0,
        'ACC': 1.0,
        'ACG': 1.0,
        'ACT': 1.0,
        'AGA': 1.0,
        'AGC': 1.0,
        'AGG': 1.0,
        'AGT': 1.0,
        'ATA': 1.0,
        'ATC': 1.0,
        'ATG': 1.0,
        'ATT': 1.0,
        'CAA': 1.0,
        'CAC': 1.0,
        'CAG': 1.0,
        'CAT': 1.0,
        'CCA': 1.0,
        'CCC': 1.0,
        'CCG': 1.0,
        'CCT': 1.0,
        'CGA': 1.0,
        'CGC': 1.0,
        'CGG': 1.0,
        'CGT': 1.0,
        'CTA': 1.0,
        'CTC': 1.0,
        'CTG': 1.0,
        'CTT': 1.0,
        'GAA': 1.0,
        'GAC': 1.0,
        'GAG': 1.0,
        'GAT': 1.0,
        'GCA': 1.0,
        'GCC': 1.0,
        'GCG': 1.0,
        'GCT': 1.0,
        'GGA': 1.0,
        'GGC': 1.0,
        'GGG': 1.0,
        'GGT': 1.0,
        'GTA': 1.0,
        'GTC': 1.0,
        'GTG': 1.0,
        'GTT': 1.0,
        'TAC': 1.0,
        'TAT': 1.0,
        'TCA': 1.0,
        'TCC': 1.0,
        'TCG': 1.0,
        'TCT': 1.0,
        'TGA': 1.0,  # this is not a stop codon in genetic code 10
        'TGC': 1.0,
        'TGG': 1.0,
        'TGT': 1.0,
        'TTA': 1.0,
        'TTC': 1.0,
        'TTG': 1.0,
        'TTT': 1.0
    }
Beispiel #10
0
def test_weights():
    assert relative_adaptiveness(sequences=["AAC"]) == {
        'AAA': 1.0,
        'AAC': 1.0,
        'AAG': 1.0,
        'AAT': (0.5 / (0.5 * (1 + 0.5))) / (1 / (0.5 * (1 + 0.5))),
        'ACA': 1.0,
        'ACC': 1.0,
        'ACG': 1.0,
        'ACT': 1.0,
        'AGA': 1.0,
        'AGC': 1.0,
        'AGG': 1.0,
        'AGT': 1.0,
        'ATA': 1.0,
        'ATC': 1.0,
        'ATG': 1.0,
        'ATT': 1.0,
        'CAA': 1.0,
        'CAC': 1.0,
        'CAG': 1.0,
        'CAT': 1.0,
        'CCA': 1.0,
        'CCC': 1.0,
        'CCG': 1.0,
        'CCT': 1.0,
        'CGA': 1.0,
        'CGC': 1.0,
        'CGG': 1.0,
        'CGT': 1.0,
        'CTA': 1.0,
        'CTC': 1.0,
        'CTG': 1.0,
        'CTT': 1.0,
        'GAA': 1.0,
        'GAC': 1.0,
        'GAG': 1.0,
        'GAT': 1.0,
        'GCA': 1.0,
        'GCC': 1.0,
        'GCG': 1.0,
        'GCT': 1.0,
        'GGA': 1.0,
        'GGC': 1.0,
        'GGG': 1.0,
        'GGT': 1.0,
        'GTA': 1.0,
        'GTC': 1.0,
        'GTG': 1.0,
        'GTT': 1.0,
        'TAC': 1.0,
        'TAT': 1.0,
        'TCA': 1.0,
        'TCC': 1.0,
        'TCG': 1.0,
        'TCT': 1.0,
        'TGC': 1.0,
        'TGG': 1.0,
        'TGT': 1.0,
        'TTA': 1.0,
        'TTC': 1.0,
        'TTG': 1.0,
        'TTT': 1.0
    }