def test_sparse_vectorss(): svss = sparse_vectorss() assert len(svss) == 0 svss.resize(5) for svs in svss: assert len(svs) == 0 svss.clear() assert len(svss) == 0 svss.extend([ sparse_vectors([ sparse_vector([pair(1, 2), pair(3, 4)]), sparse_vector([pair(5, 6), pair(7, 8)]) ]) ]) assert len(svss) == 1 assert svss[0][0][0].first == 1 assert svss[0][0][0].second == 2 assert svss[0][0][1].first == 3 assert svss[0][0][1].second == 4 assert svss[0][1][0].first == 5 assert svss[0][1][0].second == 6 assert svss[0][1][1].first == 7 assert svss[0][1][1].second == 8 deser = pickle.loads(pickle.dumps(svss, 2)) assert deser == svss
def sentence_to_sparse_vectors(sentence): vects = dlib.sparse_vectors() has_cap = dlib.sparse_vector() no_cap = dlib.sparse_vector() # make has_cap equivalent to dlib.vector([1]) has_cap.append(dlib.pair(0,1)) # Since we didn't add anything to no_cap it is equivalent to dlib.vector([0]) for word in sentence.split(): if (word[0].isupper()): vects.append(has_cap) else: vects.append(no_cap) return vects
def sentence_to_sparse_vectors(sentence): vects = dlib.sparse_vectors() has_cap = dlib.sparse_vector() no_cap = dlib.sparse_vector() # make has_cap equivalent to dlib.vector([1]) has_cap.append(dlib.pair(0, 1)) # Since we didn't add anything to no_cap it is equivalent to dlib.vector([0]) for word in sentence.split(): if (word[0].isupper()): vects.append(has_cap) else: vects.append(no_cap) return vects
def training_data(): r = Random(0) predictors = vectors() sparse_predictors = sparse_vectors() response = array() for i in range(30): for c in [-1, 1]: response.append(c) values = [r.random() + c * 0.5 for _ in range(3)] predictors.append(vector(values)) sp = sparse_vector() for i, v in enumerate(values): sp.append(pair(i, v)) sparse_predictors.append(sp) return predictors, sparse_predictors, response
def test_sparse_vector(): sv = sparse_vector() sv.append(pair(3, .1)) sv.append(pair(3, .2)) sv.append(pair(2, .3)) sv.append(pair(1, .4)) assert len(sv) == 4 make_sparse_vector(sv) assert len(sv) == 3 assert sv[0].first == 1 assert sv[0].second == .4 assert sv[1].first == 2 assert sv[1].second == .3 assert sv[2].first == 3 assert sv[2].second == approx(.3) assert str(sv) == "1: 0.4\n2: 0.3\n3: 0.3" assert repr( sv) == "< dlib.sparse_vector containing: \n1: 0.4\n2: 0.3\n3: 0.3 >"
# cross_validate_ranking_trainer(). This performs cross-validation by splitting # the queries up into folds. That is, it lets the trainer train on a subset of # ranking_pair instances and tests on the rest. It does this over 4 different # splits and returns the overall ranking accuracy based on the held out data. # Just like test_ranking_function(), it reports both the ordering accuracy and # mean average precision. print "cross validation results: ", dlib.cross_validate_ranking_trainer( trainer, queries, 4) # Finally, note that the ranking tools also support the use of sparse vectors in # addition to dense vectors (which we used above). So if we wanted to do # exactly what we did in the first part of the example program above but using # sparse vectors we would do it like so: data = dlib.sparse_ranking_pair() samp = dlib.sparse_vector() # Make samp represent the same vector as dlib.vector([1, 0]). In dlib, a sparse # vector is just an array of pair objects. Each pair stores an index and a # value. Moreover, the svm-ranking tools require sparse vectors to be sorted # and to have unique indices. This means that the indices are listed in # increasing order and no index value shows up more than once. If necessary, # you can use the dlib.make_sparse_vector() routine to make a sparse vector # object properly sorted and contain unique indices. samp.append(dlib.pair(0, 1)) data.relevant.append(samp) # Now make samp represent the same vector as dlib.vector([0, 1]) samp.clear() samp.append(dlib.pair(1, 1)) data.nonrelevant.append(samp)
# cross_validate_ranking_trainer(). This performs cross-validation by splitting # the queries up into folds. That is, it lets the trainer train on a subset of # ranking_pair instances and tests on the rest. It does this over 4 different # splits and returns the overall ranking accuracy based on the held out data. # Just like test_ranking_function(), it reports both the ordering accuracy and # mean average precision. print("Cross validation results: {}".format( dlib.cross_validate_ranking_trainer(trainer, queries, 4))) # Finally, note that the ranking tools also support the use of sparse vectors in # addition to dense vectors (which we used above). So if we wanted to do # exactly what we did in the first part of the example program above but using # sparse vectors we would do it like so: data = dlib.sparse_ranking_pair() samp = dlib.sparse_vector() # Make samp represent the same vector as dlib.vector([1, 0]). In dlib, a sparse # vector is just an array of pair objects. Each pair stores an index and a # value. Moreover, the svm-ranking tools require sparse vectors to be sorted # and to have unique indices. This means that the indices are listed in # increasing order and no index value shows up more than once. If necessary, # you can use the dlib.make_sparse_vector() routine to make a sparse vector # object properly sorted and contain unique indices. samp.append(dlib.pair(0, 1)) data.relevant.append(samp) # Now make samp represent the same vector as dlib.vector([0, 1]) samp.clear() samp.append(dlib.pair(1, 1)) data.nonrelevant.append(samp)