def test_string_kernel(): mutations = numpy.eye( 4, dtype = numpy.float64 ) c = string_kernel.MismatchKernelCalculator.create( mutations ) seqs = string_kernel.convert_to_sequence_vec( [ [ 1, 1, 2, 3 ], [ 0, 1, 2, 3 ], [ 0, 1, 2, 3 ], [ 2, 1, 2, 2 ], ] ) k = c.calculate( seqs ) # print k strings = [ 'a', 't', 'a', '' ] k, sym_to_idx, idx_to_sym = string_kernel.mismatch_kernel( strings, mismatch_weight ) print k print sym_to_idx print idx_to_sym
class SubMatWeight( object ): def __init__( self, sub_mat ): self.sub_mat = sub_mat def __call__( self, a, b ): k = a < b and (a,b) or (b,a) if not self.sub_mat.has_key( k ): raise RuntimeError( 'Substitution matrix does not have key: %s' % str( k ) ) return self.sub_mat[ k ] k, sym_to_idx, idx_to_sym = string_kernel.mismatch_kernel( [ 'ABCDEFGHIKLMNPQRSTVWXYZ', 'AAAE', 'ABCDEFGHIKLMNPQRSTVWXYZ', ], SubMatWeight( sub_mat ) ) def load_scop( scop_data_dir = 'c:/data/scop', scop_version = '1.71' ): """Return the entire scop hierachy parsed from the data files""" import corebio.resource.scop return corebio.resource.scop.Scop.parse_files( cla_file = open( '%s/dir.cla.scop.txt_%s' % (scop_data_dir, scop_version), 'r' ), des_file = open( '%s/dir.des.scop.txt_%s' % (scop_data_dir, scop_version), 'r' ), hie_file = open( '%s/dir.hie.scop.txt_%s' % (scop_data_dir, scop_version), 'r' ) )