def arima_ts(df):

    sc = SparkContext.getOrCreate()

    train = df.filter(df['date'].between('2013-01-01', '2014-11-01'))
    test = df.filter(df['date'].between('2014-11-01', '2015-05-01'))

    tr = numpy.array(train.select("sales").collect()).flatten()
    te = numpy.array(test.select("sales").collect()).flatten()
    nte = len(te)

    #model = autofit(Vectors.dense(tr), sc=sc)
    model = fit_model(p=0, d=1, q=0, ts=Vectors.dense(tr), sc=sc)
    prev = model.forecast(Vectors.dense(tr), nte)

    x = _java2py(sc, prev)[len(tr):]

    #print("ARIMA spark-ts R2: ", r2_score(te, x))

    test = test.toPandas()
    test = test.set_index('date')

    df = df.toPandas()
    df = df.set_index('date')

    x = pd.DataFrame(x, index=test.index, columns=['prediction'])

    pd.concat([test, x], axis=1).plot()
    pd.concat([df, x], axis=1).plot()

    return r2_score(te, x)
 def test_compare_with_r_with_userparams(self):
     data = data_file_as_nparray('resources/R_ARIMA_DataSet1.csv')
     model = fit_model(1,
                       0,
                       1,
                       data,
                       userInitParams=[0.0, 0.2, 1.0],
                       sc=self.sc)
     (c, ar, ma) = model.coefficients
     self.assertAlmostEqual(ar, 0.55, delta=0.01)
     self.assertAlmostEqual(ma, 1.03, delta=0.01)
Beispiel #3
0
 def test_remodel_sample_data(self):
     """
     Data sampled from a given model should result in a similar model if fit again.
     """
     model = ARIMAModel(2, 1, 2, [8.2, 0.2, 0.5, 0.3, 0.1], sc=self.sc)
     sampled = model.sample(1000)
     newModel = fit_model(2, 1, 2, sampled, sc=self.sc)
     (c, ar1, ar2, ma1, ma2) = model.coefficients
     (cTest, ar1Test, ar2Test, ma1Test, ma2Test) = newModel.coefficients
     self.assertAlmostEqual(c, cTest, delta=1)
     self.assertAlmostEqual(ar1, ar1Test, delta=0.1)
     self.assertAlmostEqual(ar2, ar2Test, delta=0.1)
     self.assertAlmostEqual(ma1, ma1Test, delta=0.1)
     self.assertAlmostEqual(ma2, ma2Test, delta=0.1)
Beispiel #4
0
 def test_compare_with_r_with_userparams(self):
     data = data_file_as_nparray('resources/R_ARIMA_DataSet1.csv')
     model = fit_model(1, 0, 1, data, userInitParams=[0.0, 0.2, 1.0], sc=self.sc)
     (c, ar, ma) = model.coefficients
     self.assertAlmostEqual(ar, 0.55, delta=0.01)
     self.assertAlmostEqual(ma, 1.03, delta=0.01)
Beispiel #5
0
 def test_compare_with_r(self):
     data = data_file_as_nparray('resources/R_ARIMA_DataSet1.csv')
     model = fit_model(1, 0, 1, data, sc=self.sc)
     (c, ar, ma) = model.coefficients
     self.assertAlmostEqual(ar, 0.3, delta=0.01)
     self.assertAlmostEqual(ma, 0.7, delta=0.01)