Beispiel #1
0
def test_sparse_vectorss():
    svss = sparse_vectorss()
    assert len(svss) == 0

    svss.resize(5)
    for svs in svss:
        assert len(svs) == 0

    svss.clear()
    assert len(svss) == 0

    svss.extend([
        sparse_vectors([
            sparse_vector([pair(1, 2), pair(3, 4)]),
            sparse_vector([pair(5, 6), pair(7, 8)])
        ])
    ])

    assert len(svss) == 1
    assert svss[0][0][0].first == 1
    assert svss[0][0][0].second == 2
    assert svss[0][0][1].first == 3
    assert svss[0][0][1].second == 4
    assert svss[0][1][0].first == 5
    assert svss[0][1][0].second == 6
    assert svss[0][1][1].first == 7
    assert svss[0][1][1].second == 8

    deser = pickle.loads(pickle.dumps(svss, 2))
    assert deser == svss
Beispiel #2
0
def test_sparse_vector():
    sv = sparse_vector()
    sv.append(pair(3, .1))
    sv.append(pair(3, .2))
    sv.append(pair(2, .3))
    sv.append(pair(1, .4))

    assert len(sv) == 4
    make_sparse_vector(sv)

    assert len(sv) == 3
    assert sv[0].first == 1
    assert sv[0].second == .4
    assert sv[1].first == 2
    assert sv[1].second == .3
    assert sv[2].first == 3
    assert sv[2].second == approx(.3)

    assert str(sv) == "1: 0.4\n2: 0.3\n3: 0.3"
    assert repr(
        sv) == "< dlib.sparse_vector containing: \n1: 0.4\n2: 0.3\n3: 0.3 >"
def sentence_to_sparse_vectors(sentence):
    vects   = dlib.sparse_vectors()
    has_cap = dlib.sparse_vector()
    no_cap  = dlib.sparse_vector()
    # make has_cap equivalent to dlib.vector([1])
    has_cap.append(dlib.pair(0,1))
    # Since we didn't add anything to no_cap it is equivalent to dlib.vector([0])

    for word in sentence.split():
        if (word[0].isupper()):
            vects.append(has_cap)
        else:
            vects.append(no_cap)
    return vects
Beispiel #4
0
def sentence_to_sparse_vectors(sentence):
    vects = dlib.sparse_vectors()
    has_cap = dlib.sparse_vector()
    no_cap = dlib.sparse_vector()
    # make has_cap equivalent to dlib.vector([1])
    has_cap.append(dlib.pair(0, 1))
    # Since we didn't add anything to no_cap it is equivalent to dlib.vector([0])

    for word in sentence.split():
        if (word[0].isupper()):
            vects.append(has_cap)
        else:
            vects.append(no_cap)
    return vects
Beispiel #5
0
def training_data():
    r = Random(0)
    predictors = vectors()
    sparse_predictors = sparse_vectors()
    response = array()
    for i in range(30):
        for c in [-1, 1]:
            response.append(c)
            values = [r.random() + c * 0.5 for _ in range(3)]
            predictors.append(vector(values))
            sp = sparse_vector()
            for i, v in enumerate(values):
                sp.append(pair(i, v))
            sparse_predictors.append(sp)
    return predictors, sparse_predictors, response
Beispiel #6
0
def training_data():
    r = Random(0)
    predictors = vectors()
    sparse_predictors = sparse_vectors()
    response = array()
    for i in range(30):
        for c in [-1, 1]:
            response.append(c)
            values = [r.random() + c * 0.5 for _ in range(3)]
            predictors.append(vector(values))
            sp = sparse_vector()
            for i, v in enumerate(values):
                sp.append(pair(i, v))
            sparse_predictors.append(sp)
    return predictors, sparse_predictors, response
Beispiel #7
0
def test_pair():
    p = pair(4, .9)
    assert p.first == 4
    assert p.second == .9

    p.first = 3
    p.second = .4

    assert p.first == 3
    assert p.second == .4

    assert str(p) == "3: 0.4"
    assert repr(p) == "dlib.pair(3, 0.4)"

    deser = pickle.loads(pickle.dumps(p, 2))
    assert deser.first == p.first
    assert deser.second == p.second
Beispiel #8
0
# Finally, note that the ranking tools also support the use of sparse vectors in
# addition to dense vectors (which we used above).  So if we wanted to do
# exactly what we did in the first part of the example program above but using
# sparse vectors we would do it like so:

data = dlib.sparse_ranking_pair()
samp = dlib.sparse_vector()

# Make samp represent the same vector as dlib.vector([1, 0]).  In dlib, a sparse
# vector is just an array of pair objects.  Each pair stores an index and a
# value.  Moreover, the svm-ranking tools require sparse vectors to be sorted
# and to have unique indices.  This means that the indices are listed in
# increasing order and no index value shows up more than once.  If necessary,
# you can use the dlib.make_sparse_vector() routine to make a sparse vector
# object properly sorted and contain unique indices.
samp.append(dlib.pair(0, 1))
data.relevant.append(samp)

# Now make samp represent the same vector as dlib.vector([0, 1])
samp.clear()
samp.append(dlib.pair(1, 1))
data.nonrelevant.append(samp)

trainer = dlib.svm_rank_trainer_sparse()
rank = trainer.train(data)
print "ranking score for a relevant vector:     ", rank(data.relevant[0])
print "ranking score for a non-relevant vector: ", rank(data.nonrelevant[0])
# Just as before, the output is the following:
#    ranking score for a relevant vector:     0.5
#    ranking score for a non-relevant vector: -0.5
Beispiel #9
0
# Finally, note that the ranking tools also support the use of sparse vectors in
# addition to dense vectors (which we used above).  So if we wanted to do
# exactly what we did in the first part of the example program above but using
# sparse vectors we would do it like so:

data = dlib.sparse_ranking_pair()
samp = dlib.sparse_vector()

# Make samp represent the same vector as dlib.vector([1, 0]).  In dlib, a sparse
# vector is just an array of pair objects.  Each pair stores an index and a
# value.  Moreover, the svm-ranking tools require sparse vectors to be sorted
# and to have unique indices.  This means that the indices are listed in
# increasing order and no index value shows up more than once.  If necessary,
# you can use the dlib.make_sparse_vector() routine to make a sparse vector
# object properly sorted and contain unique indices. 
samp.append(dlib.pair(0, 1))
data.relevant.append(samp)

# Now make samp represent the same vector as dlib.vector([0, 1])
samp.clear()
samp.append(dlib.pair(1, 1))
data.nonrelevant.append(samp)

trainer = dlib.svm_rank_trainer_sparse()
rank = trainer.train(data)
print("Ranking score for a relevant vector:     {}".format(
    rank(data.relevant[0])))
print("Ranking score for a non-relevant vector: {}".format(
    rank(data.nonrelevant[0])))
# Just as before, the output is the following:
#    ranking score for a relevant vector:     0.5