Esempio n. 1
0
 def test_list_feature(self):
     t = vw.VWClassifier(target="target", num_iterations=10)
     try:
         df = pd.DataFrame.from_dict([{
             "target": "1",
             "df": ["a", "b", "c", "d"]
         }, {
             "target": "2",
             "df": ["x", "y", "z"]
         }])
         t.fit(df)
         df2 = pd.DataFrame.from_dict([{
             "df": ["a", "b", "c", "d"]
         }, {
             "df": ["x", "y", "z"]
         }])
         scores = t.predict_proba(df2)
         if not scores is None:
             print scores
         self.assertEquals(scores.shape[0], 2)
         self.assertTrue(scores[0][0] > scores[0][1])
         self.assertEquals(scores.shape[1], 2)
         self.assertTrue(scores[1][0] < scores[1][1])
     finally:
         t.close()
Esempio n. 2
0
 def test_zero_based_target(self):
     t = vw.VWClassifier(target="target", target_readable="name")
     try:
         df = pd.DataFrame.from_dict([{
             "target": 0,
             "b": "c d",
             "c": 3,
             "name": "zeroTarget"
         }, {
             "target": 1,
             "b": "word2",
             "name": "oneTarget"
         }])
         t.fit(df)
         scores = t.predict_proba(df)
         print "scores->", scores
         self.assertEquals(scores.shape[0], 2)
         self.assertEquals(scores.shape[1], 2)
         idMap = t.get_class_id_map()
         print idMap
         formatted_recs_list = []
         for index, proba in enumerate(scores[0]):
             print index, proba
             if index in idMap:
                 indexName = idMap[index]
             else:
                 indexName = str(index)
             formatted_recs_list.append({
                 "prediction": str(proba),
                 "predictedClass": indexName,
                 "confidence": str(proba)
             })
         print formatted_recs_list
     finally:
         t.close()
Esempio n. 3
0
 def test_vw_same_score_bug(self):
     t = vw.VWClassifier(target="target", num_iterations=10)
     try:
         df = pd.DataFrame.from_dict([{
             "target": "1",
             "df": ["a", "b", "c", "d"]
         }, {
             "target": "2",
             "df": ["x", "y", "z"]
         }])
         t.fit(df)
         df2 = pd.DataFrame.from_dict([{
             "df": ["a", "b", "c", "d"]
         }, {
             "df": ["x", "y", "z"]
         }])
         scores = t.predict_proba(df2)
         score_00 = scores[0][0]
         score_10 = scores[1][0]
         for i in range(1, 4):
             scores = t.predict_proba(df2)
             self.assertEquals(scores[0][0], score_00)
             self.assertEquals(scores[1][0], score_10)
     finally:
         t.close()
Esempio n. 4
0
 def test_sklearn_pipeline(self):
     t = vw.VWClassifier(target="target")
     f1 = {"target": 0, "b": 1.0, "c": 0}
     f2 = {"target": 1, "b": 0, "c": 2.0}
     fs = []
     for i in range(1, 50):
         fs.append(f1)
         fs.append(f2)
     print "features=>", fs
     df = pd.DataFrame.from_dict(fs)
     estimators = [("vw", t)]
     p = Pipeline(estimators)
     print "fitting"
     p.fit(df)
     print "get preds 1 "
     preds = p.predict_proba(df)
     print preds
     print "-------------------"
     t.close()
     # vw sometimes need some more time between invocations. Need to look into this.
     import time
     time.sleep(5)
     joblib.dump(p, "/tmp/pipeline/p")
     p2 = joblib.load("/tmp/pipeline/p")
     print "get preds 2"
     df3 = p2.predict_proba(df)
     print df3
     vw2 = p2._final_estimator
     vw2.close()
Esempio n. 5
0
 def test_numpy_input(self):
     t = vw.VWClassifier()
     try:
         X = np.random.randn(6, 4)
         y = np.array([1, 2, 1, 1, 2, 2])
         t.fit(X, y)
         scores = t.predict_proba(X)
         print scores
     finally:
         t.close()
Esempio n. 6
0
 def test_dict_feature(self):
     t = vw.VWClassifier(target="target")
     try:
         df = pd.DataFrame.from_dict([{"target":"1","df":{"1":0.234,"2":0.1}},{"target":"2","df":{"1":0.5}}])
         t.fit(df)
         scores = t.predict_proba(df)
         print scores
         self.assertEquals(scores.shape[0],2)
         self.assertEquals(scores.shape[1],2)
     finally:
         t.close()
Esempio n. 7
0
 def test_predictions(self):
     t = vw.VWClassifier(target="target")
     try:
         df = pd.DataFrame.from_dict([{"target":"1","b":"c d","c":3},{"target":"2","b":"word2"}])
         t.fit(df)
         preds = t.predict(df)
         print preds
         self.assertEquals(preds[0],0)
         self.assertEquals(preds[1],1)
     finally:
         t.close()
Esempio n. 8
0
 def test_create_features(self):
     t = vw.VWClassifier(target="target")
     try:
         df = pd.DataFrame.from_dict([{"target":"1","b":"c d","c":3},{"target":"2","b":"word2"}])
         t.fit(df)
         scores = t.predict_proba(df)
         print scores
         self.assertEquals(scores.shape[0],2)
         self.assertEquals(scores.shape[1],2)
     finally:
         t.close()
Esempio n. 9
0
 def test_vw_args(self):
     t = vw.VWClassifier(target="target",b=18)
     try:
         f = {}
         f2 = {}
         for i in range(1,5000):
             f[i] = 1
             f2[i] = 0.1
         df = pd.DataFrame.from_dict([{"target":"1","df":f},{"target":"2","df":f2}])
         t.fit(df)
         scores = t.predict_proba(df)
         print scores
         self.assertEquals(scores.shape[0],2)
         self.assertEquals(scores.shape[1],2)
     finally:
         t.close()