예제 #1
0
 def setUpClass(cls):
     # Example of crc64 collision from Sebastian Bassi using the
     # immunoglobulin lambda light chain variable region from H**o sapiens
     # Both sequences share the same CRC64 checksum: 44CAAD88706CC153
     cls.str_light_chain_one = (
         "QSALTQPASVSGSPGQSITISCTGTSSDVGSYNLVSWYQQHPGKAPKLMIYEGSKRPSGV"
         "SNRFSGSKSGNTASLTISGLQAEDEADYYCSSYAGSSTLVFGGGTKLTVL")
     cls.str_light_chain_two = (
         "QSALTQPASVSGSPGQSITISCTGTSSDVGSYNLVSWYQQHPGKAPKLMIYEGSKRPSGV"
         "SNRFSGSKSGNTASLTISGLQAEDEADYYCCSYAGSSTWVFGGGTKLTVL")
     X = CodonAdaptationIndex()
     path = os.path.join("CodonUsage", "HighlyExpressedGenes.txt")
     X.generate_index(path)
     cls.X = X
예제 #2
0
    def test_codon_usage_custom(self):
        """Test Codon Adaptation Index (CAI) using FASTA file for background."""
        # We need a FASTA file of CDS sequences to count the codon usage...
        dna_fasta_filename = "fasta.tmp"
        dna_genbank_filename = "GenBank/NC_005816.gb"
        record = SeqIO.read(dna_genbank_filename, "genbank")
        records = []
        for feature in record.features:
            if feature.type == "CDS" and len(feature.location.parts) == 1:
                start = feature.location.start.position
                end = feature.location.end.position
                table = int(feature.qualifiers["transl_table"][0])
                if feature.strand == -1:
                    seq = record.seq[start:end].reverse_complement()
                else:
                    seq = record.seq[start:end]
                # Double check we have the CDS sequence expected
                # TODO - Use any cds_start option if/when added to deal with the met
                a = "M" + str(seq[3:].translate(table))
                b = feature.qualifiers["translation"][0] + "*"
                self.assertEqual(a, b, "%r vs %r" % (a, b))
                records.append(
                    SeqRecord(
                        seq,
                        id=feature.qualifiers["protein_id"][0],
                        description=feature.qualifiers["product"][0],
                    )
                )

        with open(dna_fasta_filename, "w") as handle:
            SeqIO.write(records, handle, "fasta")

        CAI = CodonAdaptationIndex()
        # Note - this needs a FASTA file which containing non-ambiguous DNA coding
        # sequences - which should each be a whole number of codons.
        CAI.generate_index(dna_fasta_filename)
        # Now check codon usage index (CAI) using this species
        self.assertEqual(
            record.annotations["source"], "Yersinia pestis biovar Microtus str. 91001"
        )
        value = CAI.cai_for_gene("ATGCGTATCGATCGCGATACGATTAGGCGGATG")
        self.assertAlmostEqual(value, 0.67213, places=5)
        os.remove(dna_fasta_filename)
예제 #3
0
    def test_codon_usage_custom(self):
        """Test Codon Adaptation Index (CAI) using FASTA file for background."""
        #We need a FASTA file of CDS sequences to count the codon usage...
        dna_fasta_filename = "fasta.tmp"
        dna_genbank_filename = "GenBank/NC_005816.gb"
        record = SeqIO.read(dna_genbank_filename, "genbank")
        records = []
        for feature in record.features:
            if feature.type == "CDS" and not feature.sub_features:
                start = feature.location.start.position
                end = feature.location.end.position
                table = int(feature.qualifiers["transl_table"][0])
                if feature.strand == -1:
                    seq = record.seq[start:end].reverse_complement()
                else:
                    seq = record.seq[start:end]
                #Double check we have the CDS sequence expected
                #TODO - Use any cds_start option if/when added to deal with the met
                a = "M" + str(seq[3:].translate(table))
                b = feature.qualifiers["translation"][0] + "*"
                self.assertEqual(a, b, "%r vs %r" % (a, b))
                records.append(SeqRecord(seq, id=feature.qualifiers["protein_id"][0],
                                        description=feature.qualifiers["product"][0]))

        with open(dna_fasta_filename, "w") as handle:
            SeqIO.write(records, handle, "fasta")

        CAI = CodonAdaptationIndex()
        # Note - this needs a FASTA file which containing non-ambiguous DNA coding
        # sequences - which should each be a whole number of codons.
        CAI.generate_index(dna_fasta_filename)
        # Now check codon usage index (CAI) using this species
        self.assertEqual(record.annotations["source"],
                         "Yersinia pestis biovar Microtus str. 91001")
        self.assertEqual("%0.5f" % CAI.cai_for_gene("ATGCGTATCGATCGCGATACGATTAGGCGGATG"),
                         "0.67213")
        os.remove(dna_fasta_filename)
예제 #4
0
        #TODO - Use any cds_start option if/when added to deal with the met
        assert "M" + str(seq[3:].translate(table)) \
               == feature.qualifiers["translation"][0]+"*"
        records.append(SeqRecord(seq, id=feature.qualifiers["protein_id"][0],
                                 description=feature.qualifiers["product"][0]))
del start, end, table, seq
if os.path.isfile(dna_fasta_filename):
    os.remove(dna_fasta_filename)
handle = open(dna_fasta_filename, "w")
SeqIO.write(records, handle, "fasta")
handle.close()

CAI = CodonAdaptationIndex()
# Note - this needs a FASTA file which containing non-ambiguous DNA coding
# sequences - which should each be a whole number of codons.
CAI.generate_index(dna_fasta_filename)
print "Example CAI %0.5f using %s" \
      % (CAI.cai_for_gene("ATGCGTATCGATCGCGATACGATTAGGCGGATG"),
         record.annotations["source"])

os.remove(dna_fasta_filename)
del record, records
del dna_genbank_filename
del dna_fasta_filename

print

###################
# crc64 collision #
###################
예제 #5
0
        #TODO - Use any cds_start option if/when added to deal with the met
        assert "M" + str(seq[3:].translate(table)) \
               == feature.qualifiers["translation"][0]+"*"
        records.append(SeqRecord(seq, id=feature.qualifiers["protein_id"][0],
                                 description=feature.qualifiers["product"][0]))
del start, end, table, seq
if os.path.isfile(dna_fasta_filename) :
    os.remove(dna_fasta_filename)
handle = open(dna_fasta_filename, "w")
SeqIO.write(records, handle, "fasta")
handle.close()

CAI = CodonAdaptationIndex()
# Note - this needs a FASTA file which containing non-ambiguous DNA coding
# sequences - which should each be a whole number of codons.
CAI.generate_index(dna_fasta_filename)
print "Example CAI %0.5f using %s" \
      % (CAI.cai_for_gene("ATGCGTATCGATCGCGATACGATTAGGCGGATG"),
         record.annotations["source"])

os.remove(dna_fasta_filename)
del record, records
del dna_genbank_filename
del dna_fasta_filename

print

###################
# crc64 collision #
###################