Ejemplo n.º 1
0
def test_dna():
    # HMM example taken from Borodovsky and Ekisheva (2006), Problems and
    # Solutions in Biological Sequence Analysis, p. 80.
    # Four (one-hot) features T, C, A and G, two states H and L
    # (high and low C+G content).

    start = np.log([.5, .5])
    final = start

    trans = np.log([[.5, .5],
                    [.4, .6]])

    # XXX in a binary problem, w of shape (n_features,) should be enough
    w = np.log([[.2, .3, .2, .3],
                [.3, .2, .3, .2]])
    X = np.array([[0, 0, 0, 1],     # G
                  [0, 0, 0, 1],     # G
                  [0, 1, 0, 0],     # C
                  [0, 0, 1, 0],     # A
                  [0, 1, 0, 0],     # C
                  [1, 0, 0, 0],     # T
                  [0, 0, 0, 1],     # G
                  [0, 0, 1, 0],     # A
                  [0, 0, 1, 0]])    # A
    Phi = np.dot(X, w.T)

    # HHHLLLLLL
    y_true = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1])

    assert_array_equal(viterbi(Phi, trans, start, final), y_true)

    # For this problem, Viterbi actually is better than best-first.
    bf = bestfirst(Phi, trans, start, final)
    assert_greater(accuracy_score(y_true, bf), .75)
def test_dna():
    # HMM example taken from Borodovsky and Ekisheva (2006), Problems and
    # Solutions in Biological Sequence Analysis, p. 80.
    # Four (one-hot) features T, C, A and G, two states H and L
    # (high and low C+G content).

    start = np.log([.5, .5])
    final = start

    trans = np.log([[.5, .5],
                    [.4, .6]])

    # XXX in a binary problem, w of shape (n_features,) should be enough
    w = np.log([[.2, .3, .2, .3],
                [.3, .2, .3, .2]])
    X = np.array([[0, 0, 0, 1],     # G
                  [0, 0, 0, 1],     # G
                  [0, 1, 0, 0],     # C
                  [0, 0, 1, 0],     # A
                  [0, 1, 0, 0],     # C
                  [1, 0, 0, 0],     # T
                  [0, 0, 0, 1],     # G
                  [0, 0, 1, 0],     # A
                  [0, 0, 1, 0]])    # A
    score = np.dot(X, w.T)

    # HHHLLLLLL
    y_true = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1])

    assert_array_equal(viterbi(score, None, trans, start, final), y_true)

    # For this problem, Viterbi actually is better than best-first.
    bf = bestfirst(score, None, trans, start, final)
    assert_greater(accuracy_score(y_true, bf), .75)
Ejemplo n.º 3
0
def test_wikipedia_example():
    # HMM example taken from Wikipedia. Samples can be "normal", "cold" or
    # "dizzy" (represented as one-hot feature vectors). States are "Healthy"
    # and "Fever". ['normal', 'cold', 'dizzy'] has optimal state sequence
    # ['Healthy', 'Healthy', 'Fever'].

    start = np.log([.6, .4])
    final = np.log([.5, .5])    # not given, so assume uniform probabilities

    trans = np.log([[.7, .3],
                    [.4, .6]])

    w = np.log([[.5, .4, .1],
                [.1, .3, .6]])
    X = np.array([[1, 0, 0],
                  [0, 1, 0],
                  [0, 0, 1]])
    Phi = np.dot(X, w.T)

    assert_array_equal(bestfirst(Phi, trans, start, final), [0, 0, 1])
    assert_array_equal(viterbi(Phi, trans, start, final), [0, 0, 1])
def test_wikipedia_example():
    # HMM example taken from Wikipedia. Samples can be "normal", "cold" or
    # "dizzy" (represented as one-hot feature vectors). States are "Healthy"
    # and "Fever". ['normal', 'cold', 'dizzy'] has optimal state sequence
    # ['Healthy', 'Healthy', 'Fever'].

    start = np.log([.6, .4])
    final = np.log([.5, .5])    # not given, so assume uniform probabilities

    trans = np.log([[.7, .3],
                    [.4, .6]])

    w = np.log([[.5, .4, .1],
                [.1, .3, .6]])
    X = np.array([[1, 0, 0],
                  [0, 1, 0],
                  [0, 0, 1]])
    score = np.dot(X, w.T)

    assert_array_equal(bestfirst(score, None, trans, start, final), [0, 0, 1])
    assert_array_equal(viterbi(score, None, trans, start, final), [0, 0, 1])