예제 #1
0
    def setUp(self):
        test_file = os.path.join('NeuralNetwork', 'enolase.fasta')
        diff_file = os.path.join('NeuralNetwork', 'repeat.fasta')

        self.test_records = []
        self.diff_records = []

        # load the records
        for file, records in ((test_file, self.test_records),
                              (diff_file, self.diff_records)):

            handle = open(file, 'r')

            seq_parser = Fasta.SequenceParser(alphabet=IUPAC.unambiguous_dna)
            iterator = Fasta.Iterator(handle, seq_parser)
            while 1:
                seq_record = iterator.next()

                if seq_record is None:
                    break

                records.append(seq_record)

            handle.close()

        self.num_schemas = 2
        schema_ga = Schema.GeneticAlgorithmFinder()
        schema_ga.min_generations = 1
        self.finder = Schema.SchemaFinder(num_schemas=self.num_schemas,
                                          schema_finder=schema_ga)
예제 #2
0
    def setUp(self):
        self.factory = Schema.SchemaFactory()

        self.test_file = os.path.join(os.getcwd(), "NeuralNetwork", "enolase.fasta")

        ambiguity_chars = {"G" : "G",
                           "A" : "A",
                           "T" : "T",
                           "C" : "C",
                           "R" : "AG",
                           "*" : "AGTC"}

        self.schema = Schema.Schema(ambiguity_chars)
예제 #3
0
    def runTest(self):
        match = Schema.matches_schema("GATC", "AAAAA")
        assert match == 0, "Expected no match because of length differences"

        match = Schema.matches_schema("GATC", "GAT*")
        assert match == 1, "Expected match"

        match = Schema.matches_schema("GATC", "GATC")
        assert match == 1, "Expected match"

        match = Schema.matches_schema("GATC", "C*TC")
        assert match == 0, "Expected no match because of char mismatch."

        match = Schema.matches_schema("G*TC", "*TTC")
        assert match == 1, "Expected match because of ambiguity."
예제 #4
0
    def runTest(self):
        match = Schema.matches_schema("GATC", "AAAAA")
        assert match == 0, "Expected no match because of length differences"

        match = Schema.matches_schema("GATC", "GAT*")
        assert match == 1, "Expected match"

        match = Schema.matches_schema("GATC", "GATC")
        assert match == 1, "Expected match"

        match = Schema.matches_schema("GATC", "C*TC")
        assert match == 0, "Expected no match because of char mismatch."

        match = Schema.matches_schema("G*TC", "*TTC")
        assert match == 1, "Expected match because of ambiguity."
예제 #5
0
    def test_schema_representation(self):
        """Convert sequences into schema representations.
        """
        # get a set of schemas we want to code the sequence in
        schema_bank = self._load_schema_repository()
        top_schemas = schema_bank.get_top(25)
        schema_coder = Schema.SchemaCoder(top_schemas, self.schema)

        # get the sequences one at a time, and encode them
        fasta_handle = open(self.test_file, 'r')

        seq_parser = Fasta.SequenceParser(alphabet=IUPAC.unambiguous_dna)
        iterator = Fasta.Iterator(fasta_handle, seq_parser)

        while 1:
            seq_record = iterator.next()

            if seq_record is None:
                break

            schema_values = schema_coder.representation(seq_record.seq)
            if VERBOSE:
                print "Schema values:", schema_values

        fasta_handle.close()
예제 #6
0
    def runTest(self):
        match = Schema.matches_schema("GATC", "AAAAA")
        self.assertEqual(match, 0,
                         "Expected no match because of length differences")

        match = Schema.matches_schema("GATC", "GAT*")
        self.assertEqual(match, 1, "Expected match")

        match = Schema.matches_schema("GATC", "GATC")
        self.assertEqual(match, 1, "Expected match")

        match = Schema.matches_schema("GATC", "C*TC")
        self.assertEqual(match, 0,
                         "Expected no match because of char mismatch.")

        match = Schema.matches_schema("G*TC", "*TTC")
        self.assertEqual(match, 1, "Expected match because of ambiguity.")
예제 #7
0
    def setUp(self):
        ambiguity_chars = {"G" : "G",
                           "A" : "A",
                           "T" : "T",
                           "C" : "C",
                           "R" : "AG",
                           "*" : "AGTC"}

        motif_representation = Schema.Schema(ambiguity_chars)
        motifs = ("GA", "GATAG", "GA*AG", "GATRG", "*A")
        self.motif_coder = Schema.SchemaCoder(motifs,
                                              motif_representation)

        self.match_strings = [("GATAG", [.5, .5, .5, .5, 1.0]),
                              ("GAGAGATA", [float(3) / float(4), 0,
                                            float(1) / float(4), 0,
                                            1])]
예제 #8
0
    def runTest(self):
        match = Schema.matches_schema("GATC", "AAAAA")
        self.assertEqual(match, 0,
                         "Expected no match because of length differences")

        match = Schema.matches_schema("GATC", "GAT*")
        self.assertEqual(match, 1,
                         "Expected match")

        match = Schema.matches_schema("GATC", "GATC")
        self.assertEqual(match, 1,
                         "Expected match")

        match = Schema.matches_schema("GATC", "C*TC")
        self.assertEqual(match, 0,
                         "Expected no match because of char mismatch.")

        match = Schema.matches_schema("G*TC", "*TTC")
        self.assertEqual(match, 1,
                         "Expected match because of ambiguity.")
예제 #9
0
    def setUp(self):
        test_file = os.path.join('NeuralNetwork', 'enolase.fasta')
        diff_file = os.path.join('NeuralNetwork', 'repeat.fasta')

        self.test_records = []
        self.diff_records = []

        # load the records
        for file, records in ((test_file, self.test_records),
                              (diff_file, self.diff_records)):

            handle = open(file, 'r')
            records.extend(
                SeqIO.parse(handle, "fasta", alphabet=IUPAC.unambiguous_dna))
            handle.close()

        self.num_schemas = 2
        schema_ga = Schema.GeneticAlgorithmFinder()
        schema_ga.min_generations = 1
        self.finder = Schema.SchemaFinder(num_schemas=self.num_schemas,
                                          schema_finder=schema_ga)
예제 #10
0
    def test_schema_representation(self):
        """Convert sequences into schema representations."""
        # get a set of schemas we want to code the sequence in
        schema_bank = self._load_schema_repository()
        top_schemas = schema_bank.get_top(25)
        schema_coder = Schema.SchemaCoder(top_schemas, self.schema)

        # get the sequences one at a time, and encode them
        fasta_handle = open(self.test_file, 'r')
        for seq_record in SeqIO.parse(fasta_handle, "fasta",
                                      alphabet=IUPAC.unambiguous_dna):
            schema_values = schema_coder.representation(seq_record.seq)
            if VERBOSE:
                print("Schema values: %s" % schema_values)
        fasta_handle.close()
예제 #11
0
    def setUp(self):
        ambiguity_chars = {
            "G": "G",
            "A": "A",
            "T": "T",
            "C": "C",
            "R": "AG",
            "*": "AGTC"
        }

        self.motif_coder = Schema.Schema(ambiguity_chars)

        self.match_string = "GATAG"
        self.match_info = [("GA", ["GA"]), ("GATAG", ["GATAG"]),
                           ("GA*AG", ["GATAG"]), ("GATRG", ["GATAG"]),
                           ("*A", ["GA", "TA"])]