def reload_and_compare(ovr, suffix): model = ovr.fit(df) ovrPath = temp_path + "/{}".format(suffix) ovr.save(ovrPath) loadedOvr = OneVsRest.load(ovrPath) self._compare_pipelines(ovr, loadedOvr) modelPath = temp_path + "/{}Model".format(suffix) model.save(modelPath) loadedModel = OneVsRestModel.load(modelPath) self._compare_pipelines(model, loadedModel)
def test_onevsrest(self): temp_path = tempfile.mkdtemp() df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)), (1.0, Vectors.sparse(2, [], [])), (2.0, Vectors.dense(0.5, 0.5))] * 10, ["label", "features"]) lr = LogisticRegression(maxIter=5, regParam=0.01) ovr = OneVsRest(classifier=lr) model = ovr.fit(df) ovrPath = temp_path + "/ovr" ovr.save(ovrPath) loadedOvr = OneVsRest.load(ovrPath) self._compare_pipelines(ovr, loadedOvr) modelPath = temp_path + "/ovrModel" model.save(modelPath) loadedModel = OneVsRestModel.load(modelPath) self._compare_pipelines(model, loadedModel)
def test_save_load(self): temp_path = tempfile.mkdtemp() sqlContext = SQLContext(self.sc) df = sqlContext.createDataFrame( [(0.0, Vectors.dense(1.0, 0.8)), (1.0, Vectors.sparse(2, [], [])), (2.0, Vectors.dense(0.5, 0.5))], ["label", "features"], ) lr = LogisticRegression(maxIter=5, regParam=0.01) ovr = OneVsRest(classifier=lr) model = ovr.fit(df) ovrPath = temp_path + "/ovr" ovr.save(ovrPath) loadedOvr = OneVsRest.load(ovrPath) self.assertEqual(loadedOvr.getFeaturesCol(), ovr.getFeaturesCol()) self.assertEqual(loadedOvr.getLabelCol(), ovr.getLabelCol()) self.assertEqual(loadedOvr.getClassifier().uid, ovr.getClassifier().uid) modelPath = temp_path + "/ovrModel" model.save(modelPath) loadedModel = OneVsRestModel.load(modelPath) for m, n in zip(model.models, loadedModel.models): self.assertEqual(m.uid, n.uid)
def test_save_load(self): temp_path = tempfile.mkdtemp() sqlContext = SQLContext(self.sc) df = sqlContext.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)), (1.0, Vectors.sparse(2, [], [])), (2.0, Vectors.dense(0.5, 0.5))], ["label", "features"]) lr = LogisticRegression(maxIter=5, regParam=0.01) ovr = OneVsRest(classifier=lr) model = ovr.fit(df) ovrPath = temp_path + "/ovr" ovr.save(ovrPath) loadedOvr = OneVsRest.load(ovrPath) self.assertEqual(loadedOvr.getFeaturesCol(), ovr.getFeaturesCol()) self.assertEqual(loadedOvr.getLabelCol(), ovr.getLabelCol()) self.assertEqual(loadedOvr.getClassifier().uid, ovr.getClassifier().uid) modelPath = temp_path + "/ovrModel" model.save(modelPath) loadedModel = OneVsRestModel.load(modelPath) for m, n in zip(model.models, loadedModel.models): self.assertEqual(m.uid, n.uid)