def test_cai(): # first, make sure all arguments get the same result assert (CAI("AAC", reference=["AAC"]) == CAI("AAC", RSCUs=RSCU([ "AAC" ])) == CAI("AAC", weights=relative_adaptiveness(sequences=["AAC"])) == 1.0) # check other sequences assert CAI("AAT", reference=["AAC"]) == 0.5 assert CAI("AATAAT", reference=["AAC"]) == 0.5 assert CAI("AAT" * 100, reference=["AAC"]) == 0.5
def calc_CAI(self,dataa): self.__geneCount=len(self.__bacteriaORF) concatORF=[] for i in range(self.__geneCount): currentORF=self.get_ORF()[i].get_ORF() concatORF.append(currentORF) weights = relative_adaptiveness(concatORF) cursor = dataa.cursor() for i in range(self.__geneCount): currentCAIvalue=CAI(str(self.get_ORF()[i].get_ORF()), weights=weights) sql='INSERT INTO geneExpressionEstimations(bacteriaID, geneID, CAI) VALUES (' + str(self.__bacteriaID) + ' ,' + str(self.get_UTR5()[i].get_geneID()) + ' ,' + str(currentCAIvalue) + ')' cursor.execute(sql) dataa.commit()
from Bio import SeqIO from CAI import CAI, RSCU, relative_adaptiveness sequences = [] for seq_record in SeqIO.parse( "RhodosporidiumToruloidesRPGenes.fasta", "fasta" ): #change file name #Change File Name depending on organism dnaSeq = str(seq_record.seq.lower()) if len(dnaSeq) % 3 != 0: print(seq_record.id) else: sequences.append(dnaSeq) weights = relative_adaptiveness(sequences=sequences) print(weights) for seq_record in SeqIO.parse( "RhodosporidiumToruloidesRPGenes.fasta", "fasta"): #changw file name depending on organism dnaSeq = str(seq_record.seq.lower()) if len(dnaSeq) % 3 != 0: print(seq_record.id) else: print(CAI(dnaSeq, weights=weights))
def test_alternate_genetic_code(): assert relative_adaptiveness(sequences=["AAC"], genetic_code=10) == { "AAA": 1.0, "AAC": 1.0, "AAG": 1.0, "AAT": (0.5 / (0.5 * (1 + 0.5))) / (1 / (0.5 * (1 + 0.5))), "ACA": 1.0, "ACC": 1.0, "ACG": 1.0, "ACT": 1.0, "AGA": 1.0, "AGC": 1.0, "AGG": 1.0, "AGT": 1.0, "ATA": 1.0, "ATC": 1.0, "ATG": 1.0, "ATT": 1.0, "CAA": 1.0, "CAC": 1.0, "CAG": 1.0, "CAT": 1.0, "CCA": 1.0, "CCC": 1.0, "CCG": 1.0, "CCT": 1.0, "CGA": 1.0, "CGC": 1.0, "CGG": 1.0, "CGT": 1.0, "CTA": 1.0, "CTC": 1.0, "CTG": 1.0, "CTT": 1.0, "GAA": 1.0, "GAC": 1.0, "GAG": 1.0, "GAT": 1.0, "GCA": 1.0, "GCC": 1.0, "GCG": 1.0, "GCT": 1.0, "GGA": 1.0, "GGC": 1.0, "GGG": 1.0, "GGT": 1.0, "GTA": 1.0, "GTC": 1.0, "GTG": 1.0, "GTT": 1.0, "TAC": 1.0, "TAT": 1.0, "TCA": 1.0, "TCC": 1.0, "TCG": 1.0, "TCT": 1.0, "TGA": 1.0, # this is not a stop codon in genetic code 10 "TGC": 1.0, "TGG": 1.0, "TGT": 1.0, "TTA": 1.0, "TTC": 1.0, "TTG": 1.0, "TTT": 1.0, }
def test_bad_args(): # make sure bad arguments raise errors with pytest.raises(TypeError): relative_adaptiveness() with pytest.raises(TypeError): relative_adaptiveness(sequences=["AAC"], RSCUs=RSCU(["AAC"]))
def test_weights(): assert relative_adaptiveness(sequences=["AAC"]) == { "AAA": 1.0, "AAC": 1.0, "AAG": 1.0, "AAT": (0.5 / (0.5 * (1 + 0.5))) / (1 / (0.5 * (1 + 0.5))), "ACA": 1.0, "ACC": 1.0, "ACG": 1.0, "ACT": 1.0, "AGA": 1.0, "AGC": 1.0, "AGG": 1.0, "AGT": 1.0, "ATA": 1.0, "ATC": 1.0, "ATG": 1.0, "ATT": 1.0, "CAA": 1.0, "CAC": 1.0, "CAG": 1.0, "CAT": 1.0, "CCA": 1.0, "CCC": 1.0, "CCG": 1.0, "CCT": 1.0, "CGA": 1.0, "CGC": 1.0, "CGG": 1.0, "CGT": 1.0, "CTA": 1.0, "CTC": 1.0, "CTG": 1.0, "CTT": 1.0, "GAA": 1.0, "GAC": 1.0, "GAG": 1.0, "GAT": 1.0, "GCA": 1.0, "GCC": 1.0, "GCG": 1.0, "GCT": 1.0, "GGA": 1.0, "GGC": 1.0, "GGG": 1.0, "GGT": 1.0, "GTA": 1.0, "GTC": 1.0, "GTG": 1.0, "GTT": 1.0, "TAC": 1.0, "TAT": 1.0, "TCA": 1.0, "TCC": 1.0, "TCG": 1.0, "TCT": 1.0, "TGC": 1.0, "TGG": 1.0, "TGT": 1.0, "TTA": 1.0, "TTC": 1.0, "TTG": 1.0, "TTT": 1.0, }
def test_arg_equivalence(): # should be able to take either reference sequences or an RSCU dict assert relative_adaptiveness(sequences=["AAC"]) == relative_adaptiveness( RSCUs=RSCU(["AAC"]))
import argparse from CAI import relative_adaptiveness import os import json from Bio import SeqIO parser = argparse.ArgumentParser() parser.add_argument( "ref_fasta", help="fasta file containing reference sequences for given species") args = parser.parse_args() outfile = os.path.splitext(os.path.basename(args.ref_fasta))[0] + ".wts" sequence = [ str(seq.seq) for seq in SeqIO.parse(args.ref_fasta, "fasta") if len(seq) % 3 == 0 ] #bit of a hack wts_dict = relative_adaptiveness(sequences=sequence) with open(outfile, 'w+') as out: json.dump(wts_dict, out)
def test_alternate_genetic_code(): assert relative_adaptiveness(sequences=["AAC"], genetic_code=10) == { 'AAA': 1.0, 'AAC': 1.0, 'AAG': 1.0, 'AAT': (0.5 / (0.5 * (1 + 0.5))) / (1 / (0.5 * (1 + 0.5))), 'ACA': 1.0, 'ACC': 1.0, 'ACG': 1.0, 'ACT': 1.0, 'AGA': 1.0, 'AGC': 1.0, 'AGG': 1.0, 'AGT': 1.0, 'ATA': 1.0, 'ATC': 1.0, 'ATG': 1.0, 'ATT': 1.0, 'CAA': 1.0, 'CAC': 1.0, 'CAG': 1.0, 'CAT': 1.0, 'CCA': 1.0, 'CCC': 1.0, 'CCG': 1.0, 'CCT': 1.0, 'CGA': 1.0, 'CGC': 1.0, 'CGG': 1.0, 'CGT': 1.0, 'CTA': 1.0, 'CTC': 1.0, 'CTG': 1.0, 'CTT': 1.0, 'GAA': 1.0, 'GAC': 1.0, 'GAG': 1.0, 'GAT': 1.0, 'GCA': 1.0, 'GCC': 1.0, 'GCG': 1.0, 'GCT': 1.0, 'GGA': 1.0, 'GGC': 1.0, 'GGG': 1.0, 'GGT': 1.0, 'GTA': 1.0, 'GTC': 1.0, 'GTG': 1.0, 'GTT': 1.0, 'TAC': 1.0, 'TAT': 1.0, 'TCA': 1.0, 'TCC': 1.0, 'TCG': 1.0, 'TCT': 1.0, 'TGA': 1.0, # this is not a stop codon in genetic code 10 'TGC': 1.0, 'TGG': 1.0, 'TGT': 1.0, 'TTA': 1.0, 'TTC': 1.0, 'TTG': 1.0, 'TTT': 1.0 }
def test_weights(): assert relative_adaptiveness(sequences=["AAC"]) == { 'AAA': 1.0, 'AAC': 1.0, 'AAG': 1.0, 'AAT': (0.5 / (0.5 * (1 + 0.5))) / (1 / (0.5 * (1 + 0.5))), 'ACA': 1.0, 'ACC': 1.0, 'ACG': 1.0, 'ACT': 1.0, 'AGA': 1.0, 'AGC': 1.0, 'AGG': 1.0, 'AGT': 1.0, 'ATA': 1.0, 'ATC': 1.0, 'ATG': 1.0, 'ATT': 1.0, 'CAA': 1.0, 'CAC': 1.0, 'CAG': 1.0, 'CAT': 1.0, 'CCA': 1.0, 'CCC': 1.0, 'CCG': 1.0, 'CCT': 1.0, 'CGA': 1.0, 'CGC': 1.0, 'CGG': 1.0, 'CGT': 1.0, 'CTA': 1.0, 'CTC': 1.0, 'CTG': 1.0, 'CTT': 1.0, 'GAA': 1.0, 'GAC': 1.0, 'GAG': 1.0, 'GAT': 1.0, 'GCA': 1.0, 'GCC': 1.0, 'GCG': 1.0, 'GCT': 1.0, 'GGA': 1.0, 'GGC': 1.0, 'GGG': 1.0, 'GGT': 1.0, 'GTA': 1.0, 'GTC': 1.0, 'GTG': 1.0, 'GTT': 1.0, 'TAC': 1.0, 'TAT': 1.0, 'TCA': 1.0, 'TCC': 1.0, 'TCG': 1.0, 'TCT': 1.0, 'TGC': 1.0, 'TGG': 1.0, 'TGT': 1.0, 'TTA': 1.0, 'TTC': 1.0, 'TTG': 1.0, 'TTT': 1.0 }