Ejemplo n.º 1
0
 def test_order_0_states(self):
     model_builder = hmm.pssm.ModelBuilder(2)
     model = model_builder.create_background_mosaic_model(3, .01, 1.0)
     positive = model_builder.add_order_0_parameterised_state(model, emission_dist=[.1,.2,.3,.4])
     negative = model_builder.add_order_0_rev_comp_state(model, positive)
     B = hmm.model_states_2_model(model).B[3:]
     assert infpy.check_is_close_2(B[0,0], B[1,3])
     assert infpy.check_is_close_2(B[0,1], B[1,2])
     assert infpy.check_is_close_2(B[0,2], B[1,1])
     assert infpy.check_is_close_2(B[0,3], B[1,0])
Ejemplo n.º 2
0
 def test_order_0_states(self):
     model_builder = hmm.pssm.ModelBuilder(2)
     model = model_builder.create_background_mosaic_model(3, .01, 1.0)
     positive = model_builder.add_order_0_parameterised_state(
         model, emission_dist=[.1, .2, .3, .4])
     negative = model_builder.add_order_0_rev_comp_state(model, positive)
     B = hmm.model_states_2_model(model).B[3:]
     assert infpy.check_is_close_2(B[0, 0], B[1, 3])
     assert infpy.check_is_close_2(B[0, 1], B[1, 2])
     assert infpy.check_is_close_2(B[0, 2], B[1, 1])
     assert infpy.check_is_close_2(B[0, 3], B[1, 0])
Ejemplo n.º 3
0
    def test_traits(self):
        from hmm.pssm import create_background_model, seq_to_numpy
        from infpy import check_is_close_2

        for order in [0, 1, 2]:
            num_background_mosaics = 4
            traits = hmm.pssm.GappedPssmTraits(
                    K=7,
                    p_binding_site=.1,
                    background_order=order,
                    num_background_mosaics=num_background_mosaics,
                    background_model_creator=create_background_model
            )
            emission_dists = [
              [ 1., 0., 0., 0. ],
              [ 0., 1., 0., 0. ],
              [ 0., 1., 0., 0. ],
              [ 1., 0., 0., 0. ],
              [ 0., 0., 1., 0. ],
              [ 0., 0., 0., 1. ],
              [ 0., 0., 0., 1. ],
              [ 0., 0., 0., 1. ],
              [ 0., 0., 1., 0. ],
              [ 0., 1., 0., 0. ],
              [ 1., 0., 0., 0. ],
              [ 0., 1., 0., 0. ],
              [ 0., 0., 0., 1. ],
            ]
            m=hmm.as_model(traits.new_model(emission_dists))

            # check we have parameterised the reverse complement gaps correctly
            assert m.get_transition_parameterisation(29,28).idx == m.get_transition_parameterisation(14,15).idx
            assert m.get_transition_parameterisation(29,28).idx != m.get_transition_parameterisation(4,5).idx

            # check the reverse complement states are correct
            B = m.B
            for n in xrange(m.N):
                for o in xrange(m.M):
                    rev_comp_state, rev_comp_obs = traits.get_non_reverse_complement(n,o)
                    assert check_is_close_2(
                            B[rev_comp_state,rev_comp_obs],
                            B[n,o]
                    ), (
                            '%d,%d %d,%d: %f %f' % (
                                    rev_comp_state,rev_comp_obs,n,o,B[rev_comp_state,rev_comp_obs],B[n,o]
                            )
                    )

            # check viterbi gives correct result
            test_seq = 'acgtgat' # matches dist above
            test_seq_order_0 = hmm.pssm.seq_to_numpy(test_seq)
            test_seq_order_n = m.converter.to_order_n(test_seq_order_0)
            LL, states = m.viterbi(test_seq_order_n)
            for i, state in enumerate(states):
                assert (state-num_background_mosaics)/2 == i
Ejemplo n.º 4
0
    def test_traits(self):
        from hmm.pssm import create_background_model, PssmTraits, seq_to_numpy
        from infpy import check_is_close_2

        p_binding_site = .01
        num_background_states = 2
        emission_dists = [
            [1., 0., 0., 0.],
            [0., 1., 0., 0.],
            [0., 1., 0., 0.],
            [1., 0., 0., 0.],
            [0., 0., 1., 0.],
            [0., 0., 0., 1.],
            [0., 0., 0., 1.],
            [0., 0., 0., 1.],
            [0., 0., 1., 0.],
            [0., 1., 0., 0.],
            [1., 0., 0., 0.],
            [0., 1., 0., 0.],
            [0., 0., 0., 1.],
        ]
        K = len(emission_dists)
        test_seq = 'accagtttgcact'  # matches dist above
        test_seq_order_0 = seq_to_numpy(test_seq)

        # for various different orders
        for order in [1, 2]:

            # build a model of distribution above
            traits = PssmTraits(K,
                                p_binding_site,
                                order,
                                num_background_states,
                                create_background_model,
                                emission_dists=emission_dists)
            model = traits.new_model()
            converted = hmm.model_states_2_model(model)
            B = converted.B

            # check the reverse complement states are correct
            for n in xrange(model.N):
                for o in xrange(model.M):
                    rev_comp_state, rev_comp_obs = traits.get_non_reverse_complement(
                        n, o)
                    assert check_is_close_2(
                        B[rev_comp_state, rev_comp_obs],
                        B[n, o]), ('%d,%d %d,%d: %f %f' %
                                   (rev_comp_state, rev_comp_obs, n, o,
                                    B[rev_comp_state, rev_comp_obs], B[n, o]))

            # check viterbi gives correct result
            test_seq_order_n = converted.converter.to_order_n(test_seq_order_0)
            LL, states = converted.viterbi(test_seq_order_n)
            for i, state in enumerate(states):
                assert state == num_background_states + i
Ejemplo n.º 5
0
    def test_traits(self):
        from hmm.pssm import create_background_model, seq_to_numpy
        from infpy import check_is_close_2

        for order in [0, 1, 2]:
            num_background_mosaics = 4
            traits = hmm.pssm.GappedPssmTraits(
                K=7,
                p_binding_site=.1,
                background_order=order,
                num_background_mosaics=num_background_mosaics,
                background_model_creator=create_background_model)
            emission_dists = [
                [1., 0., 0., 0.],
                [0., 1., 0., 0.],
                [0., 1., 0., 0.],
                [1., 0., 0., 0.],
                [0., 0., 1., 0.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 0., 0., 1.],
                [0., 0., 1., 0.],
                [0., 1., 0., 0.],
                [1., 0., 0., 0.],
                [0., 1., 0., 0.],
                [0., 0., 0., 1.],
            ]
            m = hmm.as_model(traits.new_model(emission_dists))

            # check we have parameterised the reverse complement gaps correctly
            assert m.get_transition_parameterisation(
                29, 28).idx == m.get_transition_parameterisation(14, 15).idx
            assert m.get_transition_parameterisation(
                29, 28).idx != m.get_transition_parameterisation(4, 5).idx

            # check the reverse complement states are correct
            B = m.B
            for n in xrange(m.N):
                for o in xrange(m.M):
                    rev_comp_state, rev_comp_obs = traits.get_non_reverse_complement(
                        n, o)
                    assert check_is_close_2(
                        B[rev_comp_state, rev_comp_obs],
                        B[n, o]), ('%d,%d %d,%d: %f %f' %
                                   (rev_comp_state, rev_comp_obs, n, o,
                                    B[rev_comp_state, rev_comp_obs], B[n, o]))

            # check viterbi gives correct result
            test_seq = 'acgtgat'  # matches dist above
            test_seq_order_0 = hmm.pssm.seq_to_numpy(test_seq)
            test_seq_order_n = m.converter.to_order_n(test_seq_order_0)
            LL, states = m.viterbi(test_seq_order_n)
            for i, state in enumerate(states):
                assert (state - num_background_mosaics) / 2 == i
Ejemplo n.º 6
0
    def test_traits(self):
        from hmm.pssm import create_background_model, PssmTraits, seq_to_numpy
        from infpy import check_is_close_2

        p_binding_site = .01
        num_background_states = 2
        emission_dists = [
          [ 1., 0., 0., 0. ],
          [ 0., 1., 0., 0. ],
          [ 0., 1., 0., 0. ],
          [ 1., 0., 0., 0. ],
          [ 0., 0., 1., 0. ],
          [ 0., 0., 0., 1. ],
          [ 0., 0., 0., 1. ],
          [ 0., 0., 0., 1. ],
          [ 0., 0., 1., 0. ],
          [ 0., 1., 0., 0. ],
          [ 1., 0., 0., 0. ],
          [ 0., 1., 0., 0. ],
          [ 0., 0., 0., 1. ],
        ]
        K = len(emission_dists)
        test_seq = 'accagtttgcact' # matches dist above
        test_seq_order_0 = seq_to_numpy(test_seq)

        # for various different orders
        for order in [1, 2]:

            # build a model of distribution above
            traits = PssmTraits(K, p_binding_site, order, num_background_states, create_background_model, emission_dists=emission_dists)
            model = traits.new_model()
            converted = hmm.model_states_2_model(model)
            B = converted.B

            # check the reverse complement states are correct
            for n in xrange(model.N):
                for o in xrange(model.M):
                    rev_comp_state, rev_comp_obs = traits.get_non_reverse_complement(n,o)
                    assert check_is_close_2(B[rev_comp_state,rev_comp_obs], B[n,o]), ('%d,%d %d,%d: %f %f' % (rev_comp_state,rev_comp_obs,n,o,B[rev_comp_state,rev_comp_obs],B[n,o]))

            # check viterbi gives correct result
            test_seq_order_n = converted.converter.to_order_n(test_seq_order_0)
            LL, states = converted.viterbi(test_seq_order_n)
            for i, state in enumerate(states):
                assert state == num_background_states+i
Ejemplo n.º 7
0
      [ 0., 0., 1., 0. ],
      [ 0., 1., 0., 0. ],
      [ 1., 0., 0., 0. ],
      [ 0., 1., 0., 0. ],
      [ 0., 0., 0., 1. ],
    ]
    K = len(emission_dists)
    test_seq = 'accagtttgcact' # matches dist above
    test_seq_order_0 = seq_to_numpy(test_seq)

    # for various different orders
    for order in [0, 1, 2]:

        # build a model of distribution above
        traits = PssmTraits(K, p_binding_site, order, num_background_states, create_background_model, emission_dists=emission_dists)
        model = traits.new_model()
        converted = hmm.model_states_2_model(model)
        B = converted.B

        # check the reverse complement states are correct
        for n in xrange(model.N):
            for o in xrange(model.M):
                rev_comp_state, rev_comp_obs = traits.get_non_reverse_complement(n,o)
                assert check_is_close_2(B[rev_comp_state,rev_comp_obs], B[n,o]), ('%d,%d %d,%d: %f %f' % (rev_comp_state,rev_comp_obs,n,o,B[rev_comp_state,rev_comp_obs],B[n,o]))

        # check viterbi gives correct result
        test_seq_order_n = converted.converter.to_order_n(test_seq_order_0)
        LL, states = converted.viterbi(test_seq_order_n)
        for i, state in enumerate(states):
            assert state == num_background_states+i