def test_convergence(self): """ Test that weights converge to the required value on toy data. """ input_batches = [ self.sc.parallelize(self.generateLogisticInput(0, 1.5, 100, 42 + i)) for i in range(20)] input_stream = self.ssc.queueStream(input_batches) models = [] slr = StreamingLogisticRegressionWithSGD( stepSize=0.2, numIterations=25) slr.setInitialWeights([0.0]) slr.trainOn(input_stream) input_stream.foreachRDD( lambda x: models.append(slr.latestModel().weights[0])) self.ssc.start() def condition(): self.assertEqual(len(models), len(input_batches)) return True # We want all batches to finish for this test. self._eventually(condition, 60.0, catch_assertions=True) t_models = array(models) diff = t_models[1:] - t_models[:-1] # Test that weights improve with a small tolerance self.assertTrue(all(diff >= -0.1)) self.assertTrue(array_sum(diff > 0) > 1)
def test_convergence(self): """ Test that weights converge to the required value on toy data. """ input_batches = [ self.sc.parallelize(self.generateLogisticInput( 0, 1.5, 100, 42 + i)) for i in range(20) ] input_stream = self.ssc.queueStream(input_batches) models = [] slr = StreamingLogisticRegressionWithSGD(stepSize=0.2, numIterations=25) slr.setInitialWeights([0.0]) slr.trainOn(input_stream) input_stream.foreachRDD( lambda x: models.append(slr.latestModel().weights[0])) self.ssc.start() def condition(): self.assertEqual(len(models), len(input_batches)) return True # We want all batches to finish for this test. eventually(condition, 60.0, catch_assertions=True) t_models = array(models) diff = t_models[1:] - t_models[:-1] # Test that weights improve with a small tolerance self.assertTrue(all(diff >= -0.1)) self.assertTrue(array_sum(diff > 0) > 1)
def test_convergence(self): """ Test that weights converge to the required value on toy data. """ input_batches = [ self.sc.parallelize(self.generateLogisticInput( 0, 1.5, 100, 42 + i)) for i in range(20) ] input_stream = self.ssc.queueStream(input_batches) models = [] slr = StreamingLogisticRegressionWithSGD(stepSize=0.2, numIterations=25) slr.setInitialWeights([0.0]) slr.trainOn(input_stream) input_stream.foreachRDD( lambda x: models.append(slr.latestModel().weights[0])) t = time() self.ssc.start() self._ssc_wait(t, 15.0, 0.01) t_models = array(models) diff = t_models[1:] - t_models[:-1] # Test that weights improve with a small tolerance, self.assertTrue(all(diff >= -0.1)) self.assertTrue(array_sum(diff > 0) > 1)
def test_convergence(self): """ Test that weights converge to the required value on toy data. """ input_batches = [ self.sc.parallelize(self.generateLogisticInput(0, 1.5, 100, 42 + i)) for i in range(20)] input_stream = self.ssc.queueStream(input_batches) models = [] slr = StreamingLogisticRegressionWithSGD( stepSize=0.2, numIterations=25) slr.setInitialWeights([0.0]) slr.trainOn(input_stream) input_stream.foreachRDD( lambda x: models.append(slr.latestModel().weights[0])) t = time() self.ssc.start() self._ssc_wait(t, 15.0, 0.01) t_models = array(models) diff = t_models[1:] - t_models[:-1] # Test that weights improve with a small tolerance, self.assertTrue(all(diff >= -0.1)) self.assertTrue(array_sum(diff > 0) > 1)
def test_parameter_accuracy(self): """ Test that the final value of weights is close to the desired value. """ input_batches = [ self.sc.parallelize(self.generateLogisticInput(0, 1.5, 100, 42 + i)) for i in range(20)] input_stream = self.ssc.queueStream(input_batches) slr = StreamingLogisticRegressionWithSGD( stepSize=0.2, numIterations=25) slr.setInitialWeights([0.0]) slr.trainOn(input_stream) t = time() self.ssc.start() self._ssc_wait(t, 20.0, 0.01) rel = (1.5 - slr.latestModel().weights.array[0]) / 1.5 self.assertAlmostEqual(rel, 0.1, 1)
def test_parameter_accuracy(self): """ Test that the final value of weights is close to the desired value. """ input_batches = [ self.sc.parallelize(self.generateLogisticInput( 0, 1.5, 100, 42 + i)) for i in range(20) ] input_stream = self.ssc.queueStream(input_batches) slr = StreamingLogisticRegressionWithSGD(stepSize=0.2, numIterations=25) slr.setInitialWeights([0.0]) slr.trainOn(input_stream) t = time() self.ssc.start() self._ssc_wait(t, 20.0, 0.01) rel = (1.5 - slr.latestModel().weights.array[0]) / 1.5 self.assertAlmostEqual(rel, 0.1, 1)
with gzip.open(model_path + 'sports_ent.pkl.gz', 'rb') as g: model = cPickle.load(g) model_sports_ent.setInitialWeights(model.weights) with gzip.open(model_path + 'sports_crime.pkl.gz', 'rb') as g: model = cPickle.load(g) model_sports_crime.setInitialWeights(model.weights) with gzip.open(model_path + 'tech_ent.pkl.gz', 'rb') as g: model = cPickle.load(g) model_tech_ent.setInitialWeights(model.weights) with gzip.open(model_path + 'tech_crime.pkl.gz', 'rb') as g: model = cPickle.load(g) model_tech_crime.setInitialWeights(model.weights) with gzip.open(model_path + 'ent_crime.pkl.gz', 'rb') as g: model = cPickle.load(g) model_ent_crime.setInitialWeights(model.weights) print(model_ent_crime.latestModel().weights) model_pol_fin.trainOn(pol_fin.map(lambda x: x[0])) model_pol_sports.trainOn(pol_sports.map(lambda x: x[0])) model_pol_tech.trainOn(pol_tech.map(lambda x: x[0])) model_pol_ent.trainOn(pol_ent.map(lambda x: x[0])) model_pol_crime.trainOn(pol_crime.map(lambda x: x[0])) model_fin_sports.trainOn(fin_sports.map(lambda x: x[0])) model_fin_tech.trainOn(fin_tech.map(lambda x: x[0])) model_fin_ent.trainOn(fin_ent.map(lambda x: x[0])) model_fin_crime.trainOn(fin_crime.map(lambda x: x[0])) model_sports_tech.trainOn(sports_tech.map(lambda x: x[0])) model_sports_ent.trainOn(sports_ent.map(lambda x: x[0])) model_sports_crime.trainOn(sports_crime.map(lambda x: x[0])) model_tech_ent.trainOn(tech_ent.map(lambda x: x[0])) model_tech_crime.trainOn(tech_crime.map(lambda x: x[0]))