예제 #1
0
 def setUpClass(cls):
     # Example of crc64 collision from Sebastian Bassi using the
     # immunoglobulin lambda light chain variable region from H**o sapiens
     # Both sequences share the same CRC64 checksum: 44CAAD88706CC153
     cls.str_light_chain_one = (
         "QSALTQPASVSGSPGQSITISCTGTSSDVGSYNLVSWYQQHPGKAPKLMIYEGSKRPSGV"
         "SNRFSGSKSGNTASLTISGLQAEDEADYYCSSYAGSSTLVFGGGTKLTVL")
     cls.str_light_chain_two = (
         "QSALTQPASVSGSPGQSITISCTGTSSDVGSYNLVSWYQQHPGKAPKLMIYEGSKRPSGV"
         "SNRFSGSKSGNTASLTISGLQAEDEADYYCCSYAGSSTWVFGGGTKLTVL")
     X = CodonAdaptationIndex()
     path = os.path.join("CodonUsage", "HighlyExpressedGenes.txt")
     X.generate_index(path)
     cls.X = X
예제 #2
0
    def test_codon_usage_custom(self):
        """Test Codon Adaptation Index (CAI) using FASTA file for background."""
        # We need a FASTA file of CDS sequences to count the codon usage...
        dna_fasta_filename = "fasta.tmp"
        dna_genbank_filename = "GenBank/NC_005816.gb"
        record = SeqIO.read(dna_genbank_filename, "genbank")
        records = []
        for feature in record.features:
            if feature.type == "CDS" and len(feature.location.parts) == 1:
                start = feature.location.start.position
                end = feature.location.end.position
                table = int(feature.qualifiers["transl_table"][0])
                if feature.strand == -1:
                    seq = record.seq[start:end].reverse_complement()
                else:
                    seq = record.seq[start:end]
                # Double check we have the CDS sequence expected
                # TODO - Use any cds_start option if/when added to deal with the met
                a = "M" + str(seq[3:].translate(table))
                b = feature.qualifiers["translation"][0] + "*"
                self.assertEqual(a, b, "%r vs %r" % (a, b))
                records.append(
                    SeqRecord(
                        seq,
                        id=feature.qualifiers["protein_id"][0],
                        description=feature.qualifiers["product"][0],
                    )
                )

        with open(dna_fasta_filename, "w") as handle:
            SeqIO.write(records, handle, "fasta")

        CAI = CodonAdaptationIndex()
        # Note - this needs a FASTA file which containing non-ambiguous DNA coding
        # sequences - which should each be a whole number of codons.
        CAI.generate_index(dna_fasta_filename)
        # Now check codon usage index (CAI) using this species
        self.assertEqual(
            record.annotations["source"], "Yersinia pestis biovar Microtus str. 91001"
        )
        value = CAI.cai_for_gene("ATGCGTATCGATCGCGATACGATTAGGCGGATG")
        self.assertAlmostEqual(value, 0.67213, places=5)
        os.remove(dna_fasta_filename)
예제 #3
0
def calc_cai(sequence, genbank, cai_freqs=CAI_FREQS):
    """Return the CAI for a given genome."""
    # create CAI index
    cai_index = {}
    for codons in SynonymousCodons.values():
        codons = list(codons)
        codon_freqs = np.array([cai_freqs[x] for x in codons])
        max_freq = max(codon_freqs)
        codon_freqs = codon_freqs / max_freq
        for i, x in enumerate(codons):
            cai_index[x] = codon_freqs[i]
    cai_table = CodonAdaptationIndex()
    cai_table.set_cai_index(cai_index)

    # concatenate ORFs
    orfs = [x for x in genbank.features if x.type.lower() == "cds"]
    cds_seq = ""
    for orf in orfs:
        cds_seq += proc_sequence(
            str(orf.extract(sequence)).upper().replace("U", "T"))

    # return cai
    return cai_table.cai_for_gene(cds_seq)
예제 #4
0
tuple_records = quick_FASTA_reader(dna_fasta_filename)
assert len(tuple_records)==3
seq_records = list(SeqIO.parse(open(dna_fasta_filename),"fasta"))
assert len(seq_records)==3
for tuple_record, seq_record in zip(tuple_records, seq_records) :
    assert tuple_record == (seq_record.description, seq_record.seq.tostring())
    print "%s has GC%% of %0.1f" % (seq_record.name, GC(seq_record.seq))

##############
# CodonUsage #
##############

print
print "Codon Adaption Index (CAI)"
CAI = CodonAdaptationIndex()
# Note - this needs a whole number of codons, and a DNA seq AS A STRING.
print "Example CAI %0.5f using E. coli (default)" \
      % CAI.cai_for_gene("ATGCGTATCGATCGCGATACGATTAGGCGGATG")

#We need a FASTA file of CDS sequences to count the codon usage...
dna_fasta_filename = "fasta.tmp"
dna_genbank_filename = "GenBank/NC_005816.gb"
record = SeqIO.read(open(dna_genbank_filename), "genbank")
records = []
for feature in record.features :
    if feature.type == "CDS" \
    and not feature.sub_features :
        start = feature.location.start.position
        end = feature.location.end.position
        table = int(feature.qualifiers["transl_table"][0])
예제 #5
0
 def test_codon_usage_ecoli(self):
     """Test Codon Adaptation Index (CAI) using default E. coli data."""
     CAI = CodonAdaptationIndex()
     self.assertEqual(
         "%0.5f" % CAI.cai_for_gene("ATGCGTATCGATCGCGATACGATTAGGCGGATG"),
         "0.09978")
예제 #6
0
 def test_codon_usage_ecoli(self):
     """Test Codon Adaptation Index (CAI) using default E. coli data."""
     CAI = CodonAdaptationIndex()
     value = CAI.cai_for_gene("ATGCGTATCGATCGCGATACGATTAGGCGGATG")
     self.assertAlmostEqual(value, 0.09978, places=5)