def createModel(data): print 'Create_model' importr('forecast') robj.r(''' arima_data <- function(data){ best_arima = auto.arima(data,trace=F,stepwise=T) forecast = forecast.Arima(best_arima,h=60,level=c(99.5)) output = forecast$mean return (output) } ''') features_names = ["Ret_%d_pred" % (i) for i in range(121, 181)] predict = pd.DataFrame(columns=features_names) i = 1 for tmp in DataFrame.iter_row(data): if i % 100 == 0: print i tmp = robj.r("as.numeric")(tmp) #tmp = robj.r('ts')(tmp,start=2) tmp = robj.r('ts')(tmp, start=2, frequency=15) forecast = robj.r('arima_data')(tmp) forecast = robj.r('as.numeric')(forecast) forecast = np.array(forecast) predict2 = pd.DataFrame(forecast).T predict2.columns = features_names predict = pd.concat([predict, predict2], axis=0) i = i + 1 print predict predict.to_csv("tmp1.csv")
def createModel(data, param): print 'Create_model' importr('forecast') robj.r(''' arima_data <- function(data){ best_arima = auto.arima(data,trace=F,stepwise=T,max.P=8,max.Q=8,max.p=10,max.q=10,max.order=10, ,start.p=1,start.q=0,start.P=1,start.Q=0,seasonal=T,ic=('bic')) forecast = forecast.Arima(best_arima,h=60,level=c(99.5),stationary=T) output = forecast$mean return (output) } ''') print 'the frequency is %d' % (param['frequency']) features_names = ["Ret_%d_pred" % (i) for i in range(121, 181)] predict = pd.DataFrame(columns=features_names) i = 1 for tmp in DataFrame.iter_row(data): if i % 100 == 0: print i tmp = robj.r("as.numeric")(tmp) #tmp = robj.r('ts')(tmp,start=2) tmp = robj.r('ts')(tmp, start=2, frequency=param['frequency']) forecast = robj.r('arima_data')(tmp) forecast = robj.r('as.numeric')(forecast) forecast = np.array(forecast) predict2 = pd.DataFrame(forecast).T predict2.columns = features_names predict = pd.concat([predict, predict2], axis=0) i = i + 1 #this way I will get forecast_data , train_data raw_data.to_csv("raw.csv") predict.to_csv("predict.csv") data = predict.join(raw_data, rsuffix='_2') data.to_csv("data.raw.csv") data['Ret_120_price'] = price_train['Ret_120_price'] transform_format(data) Ret_1 = data['Ret_MinusTwo'] Ret_2 = 1 - (1.0 / ((1.0 / (1 - data['Ret_MinusOne'])) * data['Ret_120_price'] * data['Ret_180_price'])) data['Ret_PlusOne_pred'] = 0.5 * Ret_1 + 0.5 * Ret_2 data['Ret_PlusTwo_pred'] = 0.5 * Ret_2 + 0.5 * data['Ret_PlusOne_pred'] data.to_csv("data.csv") WMAE_model(data) mase = np.sum(data['error']) / (40000 * 62) print 'loss:%f' % (mase) return {'loss': mase, 'status': STATUS_OK}