def test_schema_representation(self): """Convert sequences into schema representations. """ # get a set of schemas we want to code the sequence in schema_bank = self._load_schema_repository() top_schemas = schema_bank.get_top(25) schema_coder = Schema.SchemaCoder(top_schemas, self.schema) # get the sequences one at a time, and encode them fasta_handle = open(self.test_file, 'r') seq_parser = Fasta.SequenceParser(alphabet=IUPAC.unambiguous_dna) iterator = Fasta.Iterator(fasta_handle, seq_parser) while 1: seq_record = iterator.next() if seq_record is None: break schema_values = schema_coder.representation(seq_record.seq) if VERBOSE: print "Schema values:", schema_values fasta_handle.close()
def test_schema_representation(self): """Convert sequences into schema representations.""" # get a set of schemas we want to code the sequence in schema_bank = self._load_schema_repository() top_schemas = schema_bank.get_top(25) schema_coder = Schema.SchemaCoder(top_schemas, self.schema) # get the sequences one at a time, and encode them fasta_handle = open(self.test_file, 'r') for seq_record in SeqIO.parse(fasta_handle, "fasta", alphabet=IUPAC.unambiguous_dna): schema_values = schema_coder.representation(seq_record.seq) if VERBOSE: print("Schema values: %s" % schema_values) fasta_handle.close()
def setUp(self): ambiguity_chars = {"G" : "G", "A" : "A", "T" : "T", "C" : "C", "R" : "AG", "*" : "AGTC"} motif_representation = Schema.Schema(ambiguity_chars) motifs = ("GA", "GATAG", "GA*AG", "GATRG", "*A") self.motif_coder = Schema.SchemaCoder(motifs, motif_representation) self.match_strings = [("GATAG", [.5, .5, .5, .5, 1.0]), ("GAGAGATA", [float(3) / float(4), 0, float(1) / float(4), 0, 1])]