def test_set_params(self): model = VW() assert 'l' not in model.params model.set_params(l=0.1) assert model.params['l'] == 0.1 # confirm model params reset with new construction model = VW() assert 'l' not in model.params
def test_set_params(self): model = VW() assert getattr(model, 'l') is None model.set_params(l=0.1) assert getattr(model, 'l') == 0.1 assert getattr(model, 'vw_') is None # confirm model params reset with new construction model = VW() assert getattr(model, 'l') is None
def test_passes(self, data): n_passes = 2 model = VW(loss_function='logistic', passes=n_passes) assert model.passes_ == n_passes model.fit(data.x, data.y) weights = model.get_coefs() model = VW(loss_function='logistic') # first pass weights should not be the same model.fit(data.x, data.y) assert not np.allclose(weights.data, model.get_coefs().data)
def test_save_load(self, data): file_name = "tmp_sklearn.model" model_before = VW(l=100) model_before.fit(data.x, data.y) before_saving = model_before.predict(data.x) model_before.save(file_name) model_after = VW(l=100) model_after.load(file_name) after_loading = model_after.predict(data.x) assert np.allclose(before_saving, after_loading)
def test_save_load(self, data): f = NamedTemporaryFile() model_before = VW(l=100) model_before.fit(data.x, data.y) before_saving = model_before.predict(data.x) model_before.save(f.name) model_after = VW(l=100) model_after.load(f.name) after_loading = model_after.predict(data.x) assert np.allclose(before_saving, after_loading) f.close()
def test_get_coefs(self, data): model = VW() model.fit(data.x, data.y) weights = model.get_coefs() print weights.data assert np.allclose(weights.indices, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 116060])
def test_nn(self): vw = VW(convert_to_vw=False, nn=3) pos = '1.0 | a b c' neg = '-1.0 | d e f' vw.fit([pos] * 10 + [neg] * 10) assert vw.predict(['| a b c']) > 0 assert vw.predict(['| d e f']) < 0
def test_predict(self, data): raw_model = VW(oaa=2, loss_function='logistic') raw_model.fit(data.x, data.y) model = VWMultiClassifier(oaa=2, loss_function='logistic') model.fit(data.x, data.y) assert np.allclose(raw_model.predict(data.x), model.predict(data.x))
def test_oaa(self): X = ['1 | feature1:2.5', '2 | feature1:0.11 feature2:-0.0741', '3 | feature3:2.33 feature4:0.8 feature5:-3.1', '1 | feature2:-0.028 feature1:4.43', '2 | feature5:1.532 feature6:-3.2'] model = VW(convert_to_vw=False, oaa=3) model.fit(X) assert np.allclose(model.predict(X), [ 1., 2., 3., 1., 2.])
def test_oaa_probs(self): X = ['1 | feature1:2.5', '2 | feature1:0.11 feature2:-0.0741', '3 | feature3:2.33 feature4:0.8 feature5:-3.1', '1 | feature2:-0.028 feature1:4.43', '2 | feature5:1.532 feature6:-3.2'] model = VW(convert_to_vw=False, oaa=3, loss_function='logistic') model.fit(X) prediction = model.predict(X) assert np.allclose(prediction, [1., 2., 3., 1., 2.])
def test_bfgs(self): data_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'resources', 'train.dat') model = VW(convert_to_vw=False, oaa=3, passes=30, bfgs=True, data=data_file, cache=True, quiet=False) X = ['1 | feature1:2.5', '2 | feature1:0.11 feature2:-0.0741', '3 | feature3:2.33 feature4:0.8 feature5:-3.1', '1 | feature2:-0.028 feature1:4.43', '2 | feature5:1.532 feature6:-3.2'] actual = model.predict(X) assert np.allclose(actual, [1., 2., 3., 1., 2.])
def test_predict(self, data): raw_model = VW() raw_model.fit(data.x, data.y) model = VWRegressor() model.fit(data.x, data.y) assert np.allclose(raw_model.predict(data.x), model.predict(data.x)) # ensure model can make multiple calls to predict assert np.allclose(raw_model.predict(data.x), model.predict(data.x))
def test_oaa_probs(self): X = ['1 | feature1:2.5', '2 | feature1:0.11 feature2:-0.0741', '3 | feature3:2.33 feature4:0.8 feature5:-3.1', '1 | feature2:-0.028 feature1:4.43', '2 | feature5:1.532 feature6:-3.2'] model = VW(convert_to_vw=False, oaa=3, loss_function='logistic', probabilities=True) model.fit(X) prediction = model.predict(X) assert prediction.shape == [5, 3] assert prediction[0, 0] > 0.1
def test_lrq(self): X = ['1 |user A |movie 1', '2 |user B |movie 2', '3 |user C |movie 3', '4 |user D |movie 4', '5 |user E |movie 1'] model = VW(convert_to_vw=False, lrq='um4', lrqdropout=True, loss_function='quantile') assert getattr(model, 'lrq') == 'um4' assert getattr(model, 'lrqdropout') model.fit(X) prediction = model.predict([' |user C |movie 1']) assert np.allclose(prediction, [3.], atol=1)
def test_decision_function(self, data): classes = np.array([-1., 1.]) raw_model = VW(loss_function='logistic') raw_model.fit(data.x, data.y) predictions = raw_model.predict(data.x) class_indices = (predictions > 0).astype(np.int) class_predictions = classes[class_indices] model = VWClassifier() model.fit(data.x, data.y) assert np.allclose(class_predictions, model.predict(data.x))
def test_predict_proba(self, data): raw_model = VW(probabilities=True, oaa=2, loss_function='logistic') raw_model.fit(data.x, data.y) model = VWMultiClassifier(oaa=2, loss_function='logistic') model.fit(data.x, data.y) assert np.allclose(raw_model.predict(data.x), model.predict_proba(data.x)) # ensure model can make multiple calls to predict assert np.allclose(raw_model.predict(data.x), model.predict_proba(data.x))
def test_repr(): model = VW() expected = "VW('convert_labels:False', 'quiet:True', 'sgd:False')" assert expected == model.__repr__() model = VWClassifier() expected = "VWClassifier('convert_labels:False', "\ "'loss_function:logistic', 'quiet:True', 'sgd:False')" assert expected == model.__repr__() model = VWRegressor() expected = "VWRegressor('convert_labels:False', 'quiet:True', 'sgd:False')" assert expected == model.__repr__() model = VW(convert_to_vw=False, oaa=3, loss_function='logistic', probabilities=True) expected = "VW('convert_labels:False', 'loss_function:logistic', "\ "'oaa:3', 'probabilities:True', 'quiet:True', 'sgd:False')" assert expected == model.__repr__()
def test_bfgs_no_data(self): with pytest.raises(RuntimeError): VW(convert_to_vw=False, oaa=3, passes=30, bfgs=True)
def test_predict_no_convert(self): model = VW(loss_function='logistic', convert_to_vw=False) model.fit(['-1 | bad', '1 | good']) assert np.isclose(model.predict(['| good'])[0], 0.245515)
def test_predict(self, data): model = VW(loss_function='logistic') model.fit(data.x, data.y) assert np.isclose(model.predict(data.x[:1][:1])[0], 0.406929)
def test_predict_not_fit(self, data): model = VW(loss_function='logistic') with pytest.raises(ValueError): model.predict(data.x[0])
def test_del(self, data): model = VW() model.fit(data.x, data.y) del model
def test_fit(self, data): model = VW(loss_function='logistic') assert not hasattr(model, 'fit_') model.fit(data.x, data.y) assert model.fit_
def test_init(self): assert isinstance(VW(), VW)
def test_delete(self): raw_model = VW() del raw_model
def test_fit(self, data): model = VW(loss_function='logistic') assert model.vw_ is None model.fit(data.x, data.y) assert model.vw_ is not None
def test_predict(self, data): model = VW(loss_function='logistic') model.fit(data.x, data.y) actual = model.predict(data.x[:1][:1])[0] assert np.isclose(actual, 0.406929, atol=1e-4)
def test_get_intercept(self, data): model = VW() model.fit(data.x, data.y) intercept = model.get_intercept() assert isinstance(intercept, float)