def setUpClass(cls): # Example of crc64 collision from Sebastian Bassi using the # immunoglobulin lambda light chain variable region from H**o sapiens # Both sequences share the same CRC64 checksum: 44CAAD88706CC153 cls.str_light_chain_one = ( "QSALTQPASVSGSPGQSITISCTGTSSDVGSYNLVSWYQQHPGKAPKLMIYEGSKRPSGV" "SNRFSGSKSGNTASLTISGLQAEDEADYYCSSYAGSSTLVFGGGTKLTVL") cls.str_light_chain_two = ( "QSALTQPASVSGSPGQSITISCTGTSSDVGSYNLVSWYQQHPGKAPKLMIYEGSKRPSGV" "SNRFSGSKSGNTASLTISGLQAEDEADYYCCSYAGSSTWVFGGGTKLTVL") X = CodonAdaptationIndex() path = os.path.join("CodonUsage", "HighlyExpressedGenes.txt") X.generate_index(path) cls.X = X
def test_codon_usage_custom(self): """Test Codon Adaptation Index (CAI) using FASTA file for background.""" # We need a FASTA file of CDS sequences to count the codon usage... dna_fasta_filename = "fasta.tmp" dna_genbank_filename = "GenBank/NC_005816.gb" record = SeqIO.read(dna_genbank_filename, "genbank") records = [] for feature in record.features: if feature.type == "CDS" and len(feature.location.parts) == 1: start = feature.location.start.position end = feature.location.end.position table = int(feature.qualifiers["transl_table"][0]) if feature.strand == -1: seq = record.seq[start:end].reverse_complement() else: seq = record.seq[start:end] # Double check we have the CDS sequence expected # TODO - Use any cds_start option if/when added to deal with the met a = "M" + str(seq[3:].translate(table)) b = feature.qualifiers["translation"][0] + "*" self.assertEqual(a, b, "%r vs %r" % (a, b)) records.append( SeqRecord( seq, id=feature.qualifiers["protein_id"][0], description=feature.qualifiers["product"][0], ) ) with open(dna_fasta_filename, "w") as handle: SeqIO.write(records, handle, "fasta") CAI = CodonAdaptationIndex() # Note - this needs a FASTA file which containing non-ambiguous DNA coding # sequences - which should each be a whole number of codons. CAI.generate_index(dna_fasta_filename) # Now check codon usage index (CAI) using this species self.assertEqual( record.annotations["source"], "Yersinia pestis biovar Microtus str. 91001" ) value = CAI.cai_for_gene("ATGCGTATCGATCGCGATACGATTAGGCGGATG") self.assertAlmostEqual(value, 0.67213, places=5) os.remove(dna_fasta_filename)
def calc_cai(sequence, genbank, cai_freqs=CAI_FREQS): """Return the CAI for a given genome.""" # create CAI index cai_index = {} for codons in SynonymousCodons.values(): codons = list(codons) codon_freqs = np.array([cai_freqs[x] for x in codons]) max_freq = max(codon_freqs) codon_freqs = codon_freqs / max_freq for i, x in enumerate(codons): cai_index[x] = codon_freqs[i] cai_table = CodonAdaptationIndex() cai_table.set_cai_index(cai_index) # concatenate ORFs orfs = [x for x in genbank.features if x.type.lower() == "cds"] cds_seq = "" for orf in orfs: cds_seq += proc_sequence( str(orf.extract(sequence)).upper().replace("U", "T")) # return cai return cai_table.cai_for_gene(cds_seq)
tuple_records = quick_FASTA_reader(dna_fasta_filename) assert len(tuple_records)==3 seq_records = list(SeqIO.parse(open(dna_fasta_filename),"fasta")) assert len(seq_records)==3 for tuple_record, seq_record in zip(tuple_records, seq_records) : assert tuple_record == (seq_record.description, seq_record.seq.tostring()) print "%s has GC%% of %0.1f" % (seq_record.name, GC(seq_record.seq)) ############## # CodonUsage # ############## print print "Codon Adaption Index (CAI)" CAI = CodonAdaptationIndex() # Note - this needs a whole number of codons, and a DNA seq AS A STRING. print "Example CAI %0.5f using E. coli (default)" \ % CAI.cai_for_gene("ATGCGTATCGATCGCGATACGATTAGGCGGATG") #We need a FASTA file of CDS sequences to count the codon usage... dna_fasta_filename = "fasta.tmp" dna_genbank_filename = "GenBank/NC_005816.gb" record = SeqIO.read(open(dna_genbank_filename), "genbank") records = [] for feature in record.features : if feature.type == "CDS" \ and not feature.sub_features : start = feature.location.start.position end = feature.location.end.position table = int(feature.qualifiers["transl_table"][0])
def test_codon_usage_ecoli(self): """Test Codon Adaptation Index (CAI) using default E. coli data.""" CAI = CodonAdaptationIndex() self.assertEqual( "%0.5f" % CAI.cai_for_gene("ATGCGTATCGATCGCGATACGATTAGGCGGATG"), "0.09978")
def test_codon_usage_ecoli(self): """Test Codon Adaptation Index (CAI) using default E. coli data.""" CAI = CodonAdaptationIndex() value = CAI.cai_for_gene("ATGCGTATCGATCGCGATACGATTAGGCGGATG") self.assertAlmostEqual(value, 0.09978, places=5)