def test_serialize(self): sv = SparseVector(4, {1: 1, 3: 2}) dv = array([1., 2., 3., 4.]) lst = [1, 2, 3, 4] self.assertTrue(sv is _convert_vector(sv)) self.assertTrue(dv is _convert_vector(dv)) self.assertTrue(array_equal(dv, _convert_vector(lst))) self.assertEquals(sv, _deserialize_double_vector(_serialize_double_vector(sv))) self.assertTrue(array_equal(dv, _deserialize_double_vector(_serialize_double_vector(dv)))) self.assertTrue(array_equal(dv, _deserialize_double_vector(_serialize_double_vector(lst))))
def test_serialize(self): sv = SparseVector(4, {1: 1, 3: 2}) dv = array([1., 2., 3., 4.]) lst = [1, 2, 3, 4] self.assertTrue(sv is _convert_vector(sv)) self.assertTrue(dv is _convert_vector(dv)) self.assertTrue(array_equal(dv, _convert_vector(lst))) self.assertEquals(sv, _deserialize_double_vector(_serialize_double_vector(sv))) self.assertTrue(array_equal(dv, _deserialize_double_vector(_serialize_double_vector(dv)))) self.assertTrue(array_equal(dv, _deserialize_double_vector(_serialize_double_vector(lst))))
def test_serialize(self): from scipy.sparse import lil_matrix lil = lil_matrix((4, 1)) lil[1, 0] = 1 lil[3, 0] = 2 sv = SparseVector(4, {1: 1, 3: 2}) self.assertEquals(sv, _convert_vector(lil)) self.assertEquals(sv, _convert_vector(lil.tocsc())) self.assertEquals(sv, _convert_vector(lil.tocoo())) self.assertEquals(sv, _convert_vector(lil.tocsr())) self.assertEquals(sv, _convert_vector(lil.todok())) self.assertEquals(sv, _deserialize_double_vector(_serialize_double_vector(lil))) self.assertEquals(sv, _deserialize_double_vector(_serialize_double_vector(lil.tocsc()))) self.assertEquals(sv, _deserialize_double_vector(_serialize_double_vector(lil.tocsr()))) self.assertEquals(sv, _deserialize_double_vector(_serialize_double_vector(lil.todok())))
def test_serialize(self): from scipy.sparse import lil_matrix lil = lil_matrix((4, 1)) lil[1, 0] = 1 lil[3, 0] = 2 sv = SparseVector(4, {1: 1, 3: 2}) self.assertEquals(sv, _convert_vector(lil)) self.assertEquals(sv, _convert_vector(lil.tocsc())) self.assertEquals(sv, _convert_vector(lil.tocoo())) self.assertEquals(sv, _convert_vector(lil.tocsr())) self.assertEquals(sv, _convert_vector(lil.todok())) self.assertEquals(sv, _deserialize_double_vector(_serialize_double_vector(lil))) self.assertEquals(sv, _deserialize_double_vector(_serialize_double_vector(lil.tocsc()))) self.assertEquals(sv, _deserialize_double_vector(_serialize_double_vector(lil.tocsr()))) self.assertEquals(sv, _deserialize_double_vector(_serialize_double_vector(lil.todok())))
def predict(self, x): """ Predict the label of one or more examples. :param x: Data point (feature vector), or an RDD of data points (feature vectors). """ pythonAPI = self._sc._jvm.PythonMLLibAPI() if isinstance(x, RDD): # Bulk prediction if x.count() == 0: return self._sc.parallelize([]) dataBytes = _get_unmangled_double_vector_rdd(x, cache=False) jSerializedPreds = \ pythonAPI.predictDecisionTreeModel(self._java_model, dataBytes._jrdd) serializedPreds = RDD(jSerializedPreds, self._sc, NoOpSerializer()) return serializedPreds.map(lambda bytes: _deserialize_double(bytearray(bytes))) else: # Assume x is a single data point. x_ = _serialize_double_vector(x) return pythonAPI.predictDecisionTreeModel(self._java_model, x_)
def predict(self, x): """ Predict the label of one or more examples. :param x: Data point (feature vector), or an RDD of data points (feature vectors). """ pythonAPI = self._sc._jvm.PythonMLLibAPI() if isinstance(x, RDD): # Bulk prediction if x.count() == 0: return self._sc.parallelize([]) dataBytes = _get_unmangled_double_vector_rdd(x, cache=False) jSerializedPreds = \ pythonAPI.predictDecisionTreeModel(self._java_model, dataBytes._jrdd) serializedPreds = RDD(jSerializedPreds, self._sc, NoOpSerializer()) return serializedPreds.map(lambda bytes: _deserialize_double(bytearray(bytes))) else: # Assume x is a single data point. x_ = _serialize_double_vector(x) return pythonAPI.predictDecisionTreeModel(self._java_model, x_)
def predict(self, point): serialized = _serialize_double_vector(point) return self._model.predict(serialized)
def predict(self, point): serialized = _serialize_double_vector(point) return self._model.predict(serialized)