def test_convergence(self):
        """
        Test that weights converge to the required value on toy data.
        """
        input_batches = [
            self.sc.parallelize(self.generateLogisticInput(0, 1.5, 100, 42 + i))
            for i in range(20)]
        input_stream = self.ssc.queueStream(input_batches)
        models = []

        slr = StreamingLogisticRegressionWithSGD(
            stepSize=0.2, numIterations=25)
        slr.setInitialWeights([0.0])
        slr.trainOn(input_stream)
        input_stream.foreachRDD(
            lambda x: models.append(slr.latestModel().weights[0]))

        self.ssc.start()

        def condition():
            self.assertEqual(len(models), len(input_batches))
            return True

        # We want all batches to finish for this test.
        self._eventually(condition, 60.0, catch_assertions=True)

        t_models = array(models)
        diff = t_models[1:] - t_models[:-1]
        # Test that weights improve with a small tolerance
        self.assertTrue(all(diff >= -0.1))
        self.assertTrue(array_sum(diff > 0) > 1)
    def test_convergence(self):
        """
        Test that weights converge to the required value on toy data.
        """
        input_batches = [
            self.sc.parallelize(self.generateLogisticInput(
                0, 1.5, 100, 42 + i)) for i in range(20)
        ]
        input_stream = self.ssc.queueStream(input_batches)
        models = []

        slr = StreamingLogisticRegressionWithSGD(stepSize=0.2,
                                                 numIterations=25)
        slr.setInitialWeights([0.0])
        slr.trainOn(input_stream)
        input_stream.foreachRDD(
            lambda x: models.append(slr.latestModel().weights[0]))

        self.ssc.start()

        def condition():
            self.assertEqual(len(models), len(input_batches))
            return True

        # We want all batches to finish for this test.
        eventually(condition, 60.0, catch_assertions=True)

        t_models = array(models)
        diff = t_models[1:] - t_models[:-1]
        # Test that weights improve with a small tolerance
        self.assertTrue(all(diff >= -0.1))
        self.assertTrue(array_sum(diff > 0) > 1)
Esempio n. 3
0
    def test_convergence(self):
        """
        Test that weights converge to the required value on toy data.
        """
        input_batches = [
            self.sc.parallelize(self.generateLogisticInput(
                0, 1.5, 100, 42 + i)) for i in range(20)
        ]
        input_stream = self.ssc.queueStream(input_batches)
        models = []

        slr = StreamingLogisticRegressionWithSGD(stepSize=0.2,
                                                 numIterations=25)
        slr.setInitialWeights([0.0])
        slr.trainOn(input_stream)
        input_stream.foreachRDD(
            lambda x: models.append(slr.latestModel().weights[0]))

        t = time()
        self.ssc.start()
        self._ssc_wait(t, 15.0, 0.01)
        t_models = array(models)
        diff = t_models[1:] - t_models[:-1]

        # Test that weights improve with a small tolerance,
        self.assertTrue(all(diff >= -0.1))
        self.assertTrue(array_sum(diff > 0) > 1)
Esempio n. 4
0
    def test_convergence(self):
        """
        Test that weights converge to the required value on toy data.
        """
        input_batches = [
            self.sc.parallelize(self.generateLogisticInput(0, 1.5, 100, 42 + i))
            for i in range(20)]
        input_stream = self.ssc.queueStream(input_batches)
        models = []

        slr = StreamingLogisticRegressionWithSGD(
            stepSize=0.2, numIterations=25)
        slr.setInitialWeights([0.0])
        slr.trainOn(input_stream)
        input_stream.foreachRDD(
            lambda x: models.append(slr.latestModel().weights[0]))

        t = time()
        self.ssc.start()
        self._ssc_wait(t, 15.0, 0.01)
        t_models = array(models)
        diff = t_models[1:] - t_models[:-1]

        # Test that weights improve with a small tolerance,
        self.assertTrue(all(diff >= -0.1))
        self.assertTrue(array_sum(diff > 0) > 1)
Esempio n. 5
0
    def test_parameter_accuracy(self):
        """
        Test that the final value of weights is close to the desired value.
        """
        input_batches = [
            self.sc.parallelize(self.generateLogisticInput(0, 1.5, 100, 42 + i))
            for i in range(20)]
        input_stream = self.ssc.queueStream(input_batches)

        slr = StreamingLogisticRegressionWithSGD(
            stepSize=0.2, numIterations=25)
        slr.setInitialWeights([0.0])
        slr.trainOn(input_stream)

        t = time()
        self.ssc.start()
        self._ssc_wait(t, 20.0, 0.01)
        rel = (1.5 - slr.latestModel().weights.array[0]) / 1.5
        self.assertAlmostEqual(rel, 0.1, 1)
Esempio n. 6
0
    def test_parameter_accuracy(self):
        """
        Test that the final value of weights is close to the desired value.
        """
        input_batches = [
            self.sc.parallelize(self.generateLogisticInput(
                0, 1.5, 100, 42 + i)) for i in range(20)
        ]
        input_stream = self.ssc.queueStream(input_batches)

        slr = StreamingLogisticRegressionWithSGD(stepSize=0.2,
                                                 numIterations=25)
        slr.setInitialWeights([0.0])
        slr.trainOn(input_stream)

        t = time()
        self.ssc.start()
        self._ssc_wait(t, 20.0, 0.01)
        rel = (1.5 - slr.latestModel().weights.array[0]) / 1.5
        self.assertAlmostEqual(rel, 0.1, 1)
        with gzip.open(model_path + 'sports_ent.pkl.gz', 'rb') as g:
            model = cPickle.load(g)
            model_sports_ent.setInitialWeights(model.weights)
        with gzip.open(model_path + 'sports_crime.pkl.gz', 'rb') as g:
            model = cPickle.load(g)
            model_sports_crime.setInitialWeights(model.weights)
        with gzip.open(model_path + 'tech_ent.pkl.gz', 'rb') as g:
            model = cPickle.load(g)
            model_tech_ent.setInitialWeights(model.weights)
        with gzip.open(model_path + 'tech_crime.pkl.gz', 'rb') as g:
            model = cPickle.load(g)
            model_tech_crime.setInitialWeights(model.weights)
        with gzip.open(model_path + 'ent_crime.pkl.gz', 'rb') as g:
            model = cPickle.load(g)
            model_ent_crime.setInitialWeights(model.weights)
    print(model_ent_crime.latestModel().weights)

    model_pol_fin.trainOn(pol_fin.map(lambda x: x[0]))
    model_pol_sports.trainOn(pol_sports.map(lambda x: x[0]))
    model_pol_tech.trainOn(pol_tech.map(lambda x: x[0]))
    model_pol_ent.trainOn(pol_ent.map(lambda x: x[0]))
    model_pol_crime.trainOn(pol_crime.map(lambda x: x[0]))
    model_fin_sports.trainOn(fin_sports.map(lambda x: x[0]))
    model_fin_tech.trainOn(fin_tech.map(lambda x: x[0]))
    model_fin_ent.trainOn(fin_ent.map(lambda x: x[0]))
    model_fin_crime.trainOn(fin_crime.map(lambda x: x[0]))
    model_sports_tech.trainOn(sports_tech.map(lambda x: x[0]))
    model_sports_ent.trainOn(sports_ent.map(lambda x: x[0]))
    model_sports_crime.trainOn(sports_crime.map(lambda x: x[0]))
    model_tech_ent.trainOn(tech_ent.map(lambda x: x[0]))
    model_tech_crime.trainOn(tech_crime.map(lambda x: x[0]))