Esempio n. 1
0
    def test_pickling(self):
        model_builder = hmm.pssm.ModelBuilder(3)
        model = model_builder.create_background_mosaic_model(3, .01, 1.0)
        model_builder.dump_background_mosaic_model(model, 'model.pickle')
        model_copy = model_builder.load_background_mosaic_model('model.pickle')

        converted_model = hmm.model_states_2_model(model)
        converted_model_copy = hmm.model_states_2_model(model_copy)

        assert converted_model.A.all() == converted_model_copy.A.all()
        assert converted_model.B.all() == converted_model_copy.B.all()
        assert converted_model.pi.all() == converted_model_copy.pi.all()
Esempio n. 2
0
    def test_pickling(self):
        model_builder = hmm.pssm.ModelBuilder(3)
        model = model_builder.create_background_mosaic_model(3, .01, 1.0)
        model_builder.dump_background_mosaic_model(model, 'model.pickle')
        model_copy = model_builder.load_background_mosaic_model('model.pickle')

        converted_model = hmm.model_states_2_model(model)
        converted_model_copy = hmm.model_states_2_model(model_copy)

        assert converted_model.A.all() == converted_model_copy.A.all()
        assert converted_model.B.all() == converted_model_copy.B.all()
        assert converted_model.pi.all() == converted_model_copy.pi.all()
Esempio n. 3
0
 def test_order_0_states(self):
     model_builder = hmm.pssm.ModelBuilder(2)
     model = model_builder.create_background_mosaic_model(3, .01, 1.0)
     positive = model_builder.add_order_0_parameterised_state(model, emission_dist=[.1,.2,.3,.4])
     negative = model_builder.add_order_0_rev_comp_state(model, positive)
     B = hmm.model_states_2_model(model).B[3:]
     assert infpy.check_is_close_2(B[0,0], B[1,3])
     assert infpy.check_is_close_2(B[0,1], B[1,2])
     assert infpy.check_is_close_2(B[0,2], B[1,1])
     assert infpy.check_is_close_2(B[0,3], B[1,0])
Esempio n. 4
0
    def test_traits(self):
        from hmm.pssm import create_background_model, PssmTraits, seq_to_numpy
        from infpy import check_is_close_2

        p_binding_site = .01
        num_background_states = 2
        emission_dists = [
            [1., 0., 0., 0.],
            [0., 1., 0., 0.],
            [0., 1., 0., 0.],
            [1., 0., 0., 0.],
            [0., 0., 1., 0.],
            [0., 0., 0., 1.],
            [0., 0., 0., 1.],
            [0., 0., 0., 1.],
            [0., 0., 1., 0.],
            [0., 1., 0., 0.],
            [1., 0., 0., 0.],
            [0., 1., 0., 0.],
            [0., 0., 0., 1.],
        ]
        K = len(emission_dists)
        test_seq = 'accagtttgcact'  # matches dist above
        test_seq_order_0 = seq_to_numpy(test_seq)

        # for various different orders
        for order in [1, 2]:

            # build a model of distribution above
            traits = PssmTraits(K,
                                p_binding_site,
                                order,
                                num_background_states,
                                create_background_model,
                                emission_dists=emission_dists)
            model = traits.new_model()
            converted = hmm.model_states_2_model(model)
            B = converted.B

            # check the reverse complement states are correct
            for n in xrange(model.N):
                for o in xrange(model.M):
                    rev_comp_state, rev_comp_obs = traits.get_non_reverse_complement(
                        n, o)
                    assert check_is_close_2(
                        B[rev_comp_state, rev_comp_obs],
                        B[n, o]), ('%d,%d %d,%d: %f %f' %
                                   (rev_comp_state, rev_comp_obs, n, o,
                                    B[rev_comp_state, rev_comp_obs], B[n, o]))

            # check viterbi gives correct result
            test_seq_order_n = converted.converter.to_order_n(test_seq_order_0)
            LL, states = converted.viterbi(test_seq_order_n)
            for i, state in enumerate(states):
                assert state == num_background_states + i
Esempio n. 5
0
 def test_order_0_states(self):
     model_builder = hmm.pssm.ModelBuilder(2)
     model = model_builder.create_background_mosaic_model(3, .01, 1.0)
     positive = model_builder.add_order_0_parameterised_state(
         model, emission_dist=[.1, .2, .3, .4])
     negative = model_builder.add_order_0_rev_comp_state(model, positive)
     B = hmm.model_states_2_model(model).B[3:]
     assert infpy.check_is_close_2(B[0, 0], B[1, 3])
     assert infpy.check_is_close_2(B[0, 1], B[1, 2])
     assert infpy.check_is_close_2(B[0, 2], B[1, 1])
     assert infpy.check_is_close_2(B[0, 3], B[1, 0])
Esempio n. 6
0
    def test_traits(self):
        from hmm.pssm import create_background_model, PssmTraits, seq_to_numpy
        from infpy import check_is_close_2

        p_binding_site = .01
        num_background_states = 2
        emission_dists = [
          [ 1., 0., 0., 0. ],
          [ 0., 1., 0., 0. ],
          [ 0., 1., 0., 0. ],
          [ 1., 0., 0., 0. ],
          [ 0., 0., 1., 0. ],
          [ 0., 0., 0., 1. ],
          [ 0., 0., 0., 1. ],
          [ 0., 0., 0., 1. ],
          [ 0., 0., 1., 0. ],
          [ 0., 1., 0., 0. ],
          [ 1., 0., 0., 0. ],
          [ 0., 1., 0., 0. ],
          [ 0., 0., 0., 1. ],
        ]
        K = len(emission_dists)
        test_seq = 'accagtttgcact' # matches dist above
        test_seq_order_0 = seq_to_numpy(test_seq)

        # for various different orders
        for order in [1, 2]:

            # build a model of distribution above
            traits = PssmTraits(K, p_binding_site, order, num_background_states, create_background_model, emission_dists=emission_dists)
            model = traits.new_model()
            converted = hmm.model_states_2_model(model)
            B = converted.B

            # check the reverse complement states are correct
            for n in xrange(model.N):
                for o in xrange(model.M):
                    rev_comp_state, rev_comp_obs = traits.get_non_reverse_complement(n,o)
                    assert check_is_close_2(B[rev_comp_state,rev_comp_obs], B[n,o]), ('%d,%d %d,%d: %f %f' % (rev_comp_state,rev_comp_obs,n,o,B[rev_comp_state,rev_comp_obs],B[n,o]))

            # check viterbi gives correct result
            test_seq_order_n = converted.converter.to_order_n(test_seq_order_0)
            LL, states = converted.viterbi(test_seq_order_n)
            for i, state in enumerate(states):
                assert state == num_background_states+i
Esempio n. 7
0
    def learn_model(self):
        """
        Creates several models, trains them using Baum-Welch and returns the best
        """
        self.logger.info('Learning initial model')

        # work out the parameters for baum-welch and run it
        self.known_bases = sum(self.converter.num_known_bases_order_n(seq) for seq in self.order_n_seqs)
        self.default_tolerance = 1e-6 * self.known_bases

        # create models
        models = [
                hmm.model_states_2_model(self.pssm_traits.new_model())
                for i in xrange(self.num_models)
        ]

        LLs = [ (model,self.train_model(model)[0]) for model in models ]

        # which had the best log likelihood?
        best = max(LLs, key=lambda x:x[1])
        # print best, LLs
        return best
Esempio n. 8
0
    def learn_model(self):
        """
        Creates several models, trains them using Baum-Welch and returns the best
        """
        self.logger.info('Learning initial model')

        # work out the parameters for baum-welch and run it
        self.known_bases = sum(
            self.converter.num_known_bases_order_n(seq)
            for seq in self.order_n_seqs)
        self.default_tolerance = 1e-6 * self.known_bases

        # create models
        models = [
            hmm.model_states_2_model(self.pssm_traits.new_model())
            for i in xrange(self.num_models)
        ]

        LLs = [(model, self.train_model(model)[0]) for model in models]

        # which had the best log likelihood?
        best = max(LLs, key=lambda x: x[1])
        # print best, LLs
        return best
Esempio n. 9
0
File: io.py Progetto: JohnReid/HMM
TRANSITION:4;5;1
TRANSITION:5;6;1
TRANSITION:6;7;1
TRANSITION:7;8;1

# EMISSIONS:i;b1,b2,b3,b4 - where p(state i emits base i) = bi
EMISSIONS:0;.5,.5,0,0
EMISSIONS:1;0,0,.5,.5
EMISSIONS:2;1,0,0,0
EMISSIONS:3;0,0,0,1
EMISSIONS:4;0,0,1,0
EMISSIONS:5;0,1,0,0
EMISSIONS:6;1,0,0,0
EMISSIONS:7;1,0,0,0
EMISSIONS:8;0,0,1,0
"""
    model = build_model( to_parse.split('\n') )
    m = hmm.model_states_2_model(model)
    if False:
        hmm.graph_as_svg(
                m,
                'test',
                'model_io_test',
                graphing_keywords = {
                        'show_dists' : lambda l: True,
                        'state_labels' : None
                },
                neato_properties = { '-Elen' : '2' }
        )
    with open('model.mdl', 'w') as f: write_model(model, f)
Esempio n. 10
0
      [ 0., 0., 1., 0. ],
      [ 0., 1., 0., 0. ],
      [ 1., 0., 0., 0. ],
      [ 0., 1., 0., 0. ],
      [ 0., 0., 0., 1. ],
    ]
    K = len(emission_dists)
    test_seq = 'accagtttgcact' # matches dist above
    test_seq_order_0 = seq_to_numpy(test_seq)

    # for various different orders
    for order in [0, 1, 2]:

        # build a model of distribution above
        traits = PssmTraits(K, p_binding_site, order, num_background_states, create_background_model, emission_dists=emission_dists)
        model = traits.new_model()
        converted = hmm.model_states_2_model(model)
        B = converted.B

        # check the reverse complement states are correct
        for n in xrange(model.N):
            for o in xrange(model.M):
                rev_comp_state, rev_comp_obs = traits.get_non_reverse_complement(n,o)
                assert check_is_close_2(B[rev_comp_state,rev_comp_obs], B[n,o]), ('%d,%d %d,%d: %f %f' % (rev_comp_state,rev_comp_obs,n,o,B[rev_comp_state,rev_comp_obs],B[n,o]))

        # check viterbi gives correct result
        test_seq_order_n = converted.converter.to_order_n(test_seq_order_0)
        LL, states = converted.viterbi(test_seq_order_n)
        for i, state in enumerate(states):
            assert state == num_background_states+i
Esempio n. 11
0
    except:
        print 'Could not set process priority'


    def bw_callback(LL): pass; #print 'LL: %.3f' % LL
    order = 0
    num_mosaics = 1
    cache = BackgroundModelCache()

    print '  fragment      order  # mosaics LL/base'
    for fragment in all_fragments:
        seqs = seqs_for_fragment(fragment)
        model_builder = hmm.pssm.ModelBuilder(order)
        training_sequences = [ model_builder.converter.to_order_n(s) for s in seqs ]
        known_bases = sum(model_builder.converter.num_known_bases_order_n(s) for s in training_sequences)
        print '%10s %10d %10d' % (fragment, order, num_mosaics),
        try:
            model = cache.get_model(order, num_mosaics, fragment)
        except:
            model_by_states = model_builder.create_background_mosaic_model(num_mosaics, 0.01, 100.0)
            model = hmm.model_states_2_model(model_by_states)
            tolerance = 1e-4 * known_bases
            model.baum_welch(
                    training_sequences,
                    tolerance = tolerance,
                    callback = bw_callback
            )
            cache.save_model(model, order, num_mosaics, fragment)
        LL = sum(model.forward(s)[0] for s in training_sequences)
        print LL/known_bases
Esempio n. 12
0
TRANSITION:3;5;.9
TRANSITION:4;5;1
TRANSITION:5;6;1
TRANSITION:6;7;1
TRANSITION:7;8;1

# EMISSIONS:i;b1,b2,b3,b4 - where p(state i emits base i) = bi
EMISSIONS:0;.5,.5,0,0
EMISSIONS:1;0,0,.5,.5
EMISSIONS:2;1,0,0,0
EMISSIONS:3;0,0,0,1
EMISSIONS:4;0,0,1,0
EMISSIONS:5;0,1,0,0
EMISSIONS:6;1,0,0,0
EMISSIONS:7;1,0,0,0
EMISSIONS:8;0,0,1,0
"""
    model = build_model(to_parse.split('\n'))
    m = hmm.model_states_2_model(model)
    if False:
        hmm.graph_as_svg(m,
                         'test',
                         'model_io_test',
                         graphing_keywords={
                             'show_dists': lambda l: True,
                             'state_labels': None
                         },
                         neato_properties={'-Elen': '2'})
    with open('model.mdl', 'w') as f:
        write_model(model, f)