def Processa_Historico_Predicao(): tickers = ["INDFUT", "VALE3"] for T in tickers: print("Iniciando... : " + T) #df_Tabela_Predicao = db.queryData("select id, DATA from Predicao WHERE ticker='" + T + "' and DATA > 20190103 order by DATA ASC", True) df_Tabela_Predicao = db.queryData( "select id, DATA from " + tabelaDePlotagem + " WHERE ticker='" + T + "' and real is null order by DATA ASC", True) df_Tabela_Cotacao = db.queryData( "select * from " + T + "_ohlc_d1 order by DATA ASC", True) df_Tabela_Cotacao = df_Tabela_Cotacao.rename(columns={ "data": "Date", "close": "Close" }) df_Tabela_Cotacao["Data"] = pd.to_datetime( df_Tabela_Cotacao["Date"], format='%Y%m%d').dt.normalize() df_Tabela_Cotacao.dropna(axis=0) df_Tabela_Cotacao = df_Tabela_Cotacao.set_index("Data") df_Tabela_Cotacao.drop(["Date"], axis=1, inplace=True) df_Tabela_Cotacao = d_util.returnColumn(df_Tabela_Cotacao, 1, False) df_Tabela_Predicao["data"] = pd.to_datetime( df_Tabela_Predicao["data"], format='%Y%m%d').dt.normalize() df_Tabela_Predicao = df_Tabela_Predicao.set_index("data") df_Tabela_Predicao['data_string'] = df_Tabela_Predicao.index df_Tabela_Cotacao = df_Tabela_Cotacao.dropna(axis=0) for e in df_Tabela_Predicao.iterrows(): data = e[1]["data_string"] id = e[1]["id"] if len(df_Tabela_Cotacao.loc[data:data]) > 0: pips = df_Tabela_Cotacao.loc[data:data]["Pips"][0] Variacao_Retorno = df_Tabela_Cotacao.loc[data:data]["Alvo1"][0] Real = "" if Variacao_Retorno > 0: Real = "COMPRA" else: Real = "VENDA" Variacao_Retorno = str(round(Variacao_Retorno * 100, 2)) SQL = "UPDATE " + tabelaDePlotagem + " SET pontos_real='" + str( pips) + "', real='" + str( Real) + "', variacao_real='" + str( Variacao_Retorno) + "' where id='" + str(id) + "'" db.queryExecute(SQL) print("Concluido... : " + T)
def storeQuote(timestamp, ticker,Open,High,Low,Close): sqlConsulta = "SELECT * FROM " + ticker + " WHERE data = '" + str(timestamp) + "'" df_consulta = db.queryData(sqlConsulta) if len(df_consulta) > 0: str_SQL = "UPDATE " + ticker + " SET data = '" + str(timestamp) + "',open = '" + str(Open) + "',high = '" + str(High) + "',low = '" + str(Low) + "',close = '" + str(Close) + "'" str_UPDT = " WHERE id = '" + str(df_consulta[0][0]) + "'" db.queryExecute(str_SQL + str_UPDT) else: str_SQL = 'INSERT INTO ' + ticker + ' (data,Open,High,Low,Close) VALUES ' string_Insert = " ('" + str(timestamp) + "'," + "'" + str(Open) + "'," + "'" + str(High) + "'," + "'" + str(Low) + "'," + "'" + str(Close) + "')" try: db.queryExecute(str_SQL + string_Insert) except: time.sleep(2) db.queryExecute(str_SQL + string_Insert)
def __init__(self, ticker, data): prediction_data = db.queryData( "SELECT * FROM " + tabelaDePlotagem + " where ticker='" + ticker + "' and DATA='" + data + "' ORDER BY DATA ASC", True) if len(prediction_data) > 0: self.id = int(prediction_data["id"][0]) self.date = str(prediction_data["data"][0]) self.ticker = prediction_data["ticker"][0] self.overall_predict = prediction_data["predict"][0] predicted_collection_info = ast.literal_eval( prediction_data["datainfo"][0]) self.collection = [] for item in predicted_collection_info["modelSets"]: self.collection.append(collectionData(item)) self.collection_short_signal = prediction_data["shortPercent"][0] self.collection_long_signal = prediction_data["longPercent"][0] self.real = prediction_data["real"][0] self.real_variation = prediction_data["variacao_real"][0] self.real_pips = prediction_data["pontos_real"][0] else: self.id = "error"
def dataProcess(): infoData_Train = { 'INDFUT': { 'Data': { 'start_train': "2003-12-02", 'end_train': "2016-12-31" }, 'modelSets': [ [ 'RSL_std15', 'v', 'a', 'Distancia_BBL', 'Dia_Semana', 'Dia_Mes', 'Mes' ], ['RSL_std15', 'v', 'a', 'tau', 'Dia_Semana', 'Dia_Mes', 'Mes'], ['RSL_std5', 'm', 'f', 'tau', 'Dia_Semana', 'Dia_Mes', 'Mes'], [ 'v', 'a', 'T', 'Distancia_BBL', 'Distancia_SAR', 'Dia_Semana', 'Dia_Mes' ], [ 'v', 'a', 'cat', 'Distancia_BBL', 'Dia_Semana', 'Dia_Mes', 'Mes' ], ['v', 'k', 'tau', 'M', 'Distancia_BBL', 'Dia_Semana', 'Mes'], [ 'v', 'a', 'M', 'Distancia_BBL', 'Dia_Semana', 'Dia_Mes', 'Mes' ], [ 'RSL_std15', 'v', 'a', 'Distancia_SAR', 'Dia_Semana', 'Dia_Mes', 'Mes' ], [ 'v', 'a', 'cat', 'Distancia_BBL', 'Distancia_SAR', 'Dia_Semana', 'Dia_Mes' ], [ 'RSL_std10', 'v', 'T', 'Distancia_BBL', 'Dia_Semana', 'Dia_Mes', 'Mes' ], [ 'v', 'a', 'tau', 'Distancia_SAR', 'Dia_Semana', 'Dia_Mes', 'Mes' ], [ 'v', 'tau', 'g', 'Distancia_SAR', 'Dia_Semana', 'Dia_Mes', 'Mes' ], [ 'RSL_std15', 'a', 'cat', 'Distancia_BBL', 'Dia_Semana', 'Dia_Mes', 'Mes' ], ['RSL_std5', 'm', 'w', 'tau', 'Dia_Semana', 'Dia_Mes', 'Mes'], [ 'v', 'a', 'tau', 'Distancia_BBL', 'Distancia_SAR', 'Dia_Semana', 'Dia_Mes' ], ['v', 'a', 'T', 'k', 'Distancia_BBL', 'Dia_Semana', 'Mes'], [ 'RSL_std10', 'v', 'M', 'Distancia_BBL', 'Dia_Semana', 'Dia_Mes', 'Mes' ], [ 'RSL_std15', 'v', 'tau', 'Distancia_SAR', 'Dia_Semana', 'Dia_Mes', 'Mes' ], [ 'v', 'a', 'T', 'Distancia_BBL', 'Dia_Semana', 'Dia_Mes', 'Mes' ], [ 'RSL_std5', 'a', 'M', 'Distancia_BBL', 'Dia_Semana', 'Dia_Mes', 'Mes' ], [ 'RSL_std10', 'RSL_std15', 'v', 'a', 'Dia_Semana', 'Dia_Mes', 'Mes' ], [ 'v', 'k', 'tau', 'Distancia_SAR', 'Dia_Semana', 'Dia_Mes', 'Mes' ], [ 'v', 'm', 'cat', 'M', 'Distancia_BBL', 'posicao_sar', 'Dia_Semana' ], [ 'v', 'a', 'T', 'M', 'Distancia_BBL', 'posicao_sar', 'Dia_Semana' ], [ 'RSL_std15', 'f', 'cat', 'Distancia_BBL', 'Dia_Semana', 'Dia_Mes', 'Mes' ], [ 'RSL_std15', 'cat', 'tau', 'Distancia_BBL', 'Dia_Semana', 'Dia_Mes', 'Mes' ], ['v', 'm', 'T', 'cat', 'M', 'Distancia_BBL', 'Dia_Semana'], [ 'RSL_std15', 'm', 'cat', 'tau', 'M', 'Distancia_BBL', 'Dia_Semana' ], [ 'v', 'a', 'k', 'tau', 'Distancia_BBL', 'Dia_Semana', 'Dia_Mes' ], [ 'v', 'a', 'tau', 'Distancia_BBL', 'Dia_Semana', 'Dia_Mes', 'Mes' ], ['v', 'm', 'T', 'cat', 'tau', 'M', 'Distancia_BBL'], [ 'v', 'cat', 'tau', 'Distancia_BBL', 'Dia_Semana', 'Dia_Mes', 'Mes' ], [ 'RSL_std5', 'f', 'g', 'Distancia_BBL', 'Dia_Semana', 'Dia_Mes', 'Mes' ], ] }, 'VALE3': { 'Data': { 'start_train': "2004-01-02", 'end_train': "2017-12-31" }, 'modelSets': [ ['dir_D-2', 'dir_D-3', 'Dia_Mes', 'Mes'], ['RSL_std5', 'RSL_std15', 'posicao_sar', 'Dia_Mes'], ['dir_D-1', 'dir_D-3', 'RSL_std5', 'RSL_std15', 'Dia_Mes'], ['dir_D-1', 'dir_D-2', 'dir_D-3', 'RSL_std15', 'a'], ['dir_D-1', 'RSL_std5', 'RSL_std15', 'Dia_Mes'], ['dir_D-1', 'dir_D-2', 'RSL_std15', 'm', 'posicao_sar'], ['RSL_std10', 'M', 'Dia_Mes', 'Mes'], ['RSL_std15', 'm', 'Distancia_BBL', 'Dia_Mes'], ['tau', 'Distancia_BBL', 'Dia_Semana'], ['dir_D-2', 'RSL_std5', 'v', 'Distancia_BBL'], ['dir_D-2', 'RSL_std5', 'a', 'Distancia_BBL'], ['dir_D-2', 'v', 'k', 'Dia_Mes'], ['dir_D-1', 'dir_D-2', 'w', 'Dia_Mes'], ['dir_D-1', 'dir_D-2', 'f', 'Dia_Mes'], ['a', 'cat', 'Dia_Semana'], ['dir_D-2', 'tau', 'Distancia_BBH', 'Dia_Mes'], ['dir_D-1', 'dir_D-3', 'RSL_std15', 'Dia_Mes'], ['RSL_std10', 'k', 'g', 'Dia_Mes'], ['k', 'Dia_Mes'], ['RSL_std15', 'k', 'M', 'Dia_Mes'], ['a', 'tau', 'Distancia_BBL', 'Dia_Semana'], ['dir_D-1', 'dir_D-2', 'f', 'w', 'Dia_Mes'], ['tau', 'Dia_Semana', 'Dia_Mes', 'Mes'], ['dir_D-1', 'dir_D-2', 'T', 'tau', 'g'], ['a', 'Distancia_BBH', 'Distancia_BBL', 'Distancia_SAR'], ['k', 'Distancia_M7', 'Distancia_BBH', 'Dia_Mes'], ['dir_D-1', 'dir_D-2', 'k', 'Distancia_BBL', 'posicao_sar'], ['dir_D-2', 'tau', 'Dia_Mes'], ['dir_D-2', 'dir_D-3', 'posicao_sar', 'Dia_Mes'], ['RSL_std5', 'RSL_std10', 'w', 'Distancia_BBH'], ['RSL_std5', 'RSL_std10', 'f', 'Distancia_BBH'], ['dir_D-1', 'dir_D-2', 'RSL_std5', 'a', 'Distancia_BBL'], ['dir_D-1', 'dir_D-3', 'RSL_std15', 'm', 'Dia_Mes'], ['dir_D-1', 'k', 'Distancia_BBH', 'Dia_Mes'], ['dir_D-2', 'a', 'k', 'Distancia_BBL'], ['dir_D-1', 'RSL_std15', 'Distancia_M7', 'Distancia_BBL'], ] } } for T in infoData_Train: #T = "VALE3" print("Processing: " + T) start_train = infoData_Train[T]["Data"]["start_train"] end_train = infoData_Train[T]["Data"]["end_train"] modelSets = infoData_Train[T]["modelSets"] df1 = db.queryData("select * from " + T + "_ohlc_d1", True) df1 = df1.rename(columns={"data": "Date", "close": "Close"}) df1["Data"] = pd.to_datetime(df1["Date"], format='%Y%m%d').dt.normalize() df1 = df1.sort_values(['Data'], ascending=True) df1.dropna(axis=0) df1 = df1.set_index("Data") df1.drop(["Date"], axis=1, inplace=True) df1.drop(["id"], axis=1, inplace=True) dfOriginal = df1.copy() dfFiltered = df1.copy() dfFiltered = d_util.returnColumn(df1, 1, False) dfFiltered = d_util.techIndicator(dfFiltered) dfFiltered = d_util.omIndicator(dfFiltered, False) dfFiltered = d_util.physicsIndicator(dfFiltered, 5, 5, False) dfFiltered["Dia_Semana"] = dfFiltered.index.strftime("%w") dfFiltered["Dia_Mes"] = dfFiltered.index.strftime("%d") dfFiltered["Mes"] = dfFiltered.index.strftime("%m") dfFiltered["Alvo_Bin"] = np.where(dfFiltered['Alvo1'] > 0, 1, 0) df1_predict = dfFiltered.tail(1).copy() dfFiltered = dfFiltered.dropna(axis=0) dfFiltered = dfFiltered.dropna(axis=1) lastDate = datetime.datetime.strftime(df1_predict.index[0], "%d/%m/%Y") nextDate = datetime.datetime.strftime( df1_predict.index[0] + datetime.timedelta(days=1), "%d/%m/%Y") startTest = datetime.datetime.strftime( dfFiltered.tail(daysOfTest).index[0], "%Y-%m-%d") endTest = datetime.datetime.strftime( df1_predict.index[0] - datetime.timedelta(days=1), "%Y-%m-%d") d = [ 'algo', 'modelSet', 'currentAccuracy', 'returnResult', 'returnAccum', 'coeffInclination', 'confusionMatrix', 'resultPrediction' ] dfCurrentResult = pd.DataFrame(columns=d) for itm in modelSets: df1_train1 = dfFiltered[start_train:end_train].copy() dfTesting = dfFiltered[startTest:endTest].copy() x_train1 = df1_train1[itm] y_train1 = df1_train1['Alvo_Bin'] x_test1 = dfTesting[itm] y_test1 = dfTesting['Alvo_Bin'] rf1 = RandomForestClassifier(bootstrap=True, criterion='gini', max_depth=10, max_features='auto', min_samples_leaf=1, min_samples_split=2, n_estimators=1500, n_jobs=5, oob_score=True, random_state=42) rf1.fit(x_train1, y_train1) lastDayPrediction = df1_predict[itm] y_pred_test1 = rf1.predict(x_test1) resultPrediction = rf1.predict(lastDayPrediction) confusionMatrix = classification_report(y_test1, y_pred_test1, output_dict=True) currentAccuracy = round( metrics.accuracy_score(y_test1, y_pred_test1) * 100, 3) stop = 100000 dfTesting.loc[:, "Predicted"] = y_pred_test1 dfTesting["Predicted"].astype(str) dfTesting.loc[:, "PipsReturn"] = np.where( dfTesting.loc[:, 'Predicted'] == 1, dfTesting.loc[:, 'Pips'], '0') dfTesting.loc[:, "PipsReturn"] = np.where( dfTesting.loc[:, 'Predicted'] == 0, -1 * dfTesting.loc[:, 'Pips'], dfTesting.loc[:, "PipsReturn"]) dfTesting.loc[:, "PipsReturn"] = dfTesting["PipsReturn"].astype(float) dfTesting.loc[:, "PipsReturn"] = np.where( dfTesting.loc[:, 'PipsReturn'] <= -stop, -stop, dfTesting.loc[:, "PipsReturn"]) dfTesting.loc[:, "PipsReturn_Accum"] = dfTesting["PipsReturn"].cumsum( ) returnResult = str(dfTesting["PipsReturn_Accum"].tail(1)[0]) dfReturn = dfTesting.copy() dfReturn = dfReturn.reset_index() returnAccum = {} for e in dfReturn.iterrows(): data = str(datetime.datetime.strftime(e[1]["Data"], "%Y%m%d")) acc = str(round(e[1]["PipsReturn_Accum"], 2)) returnAccum[data] = acc print(str(itm) + " -- " + str(returnAccum)) dfLinearRegression = dfTesting.copy() y3 = np.asarray(dfLinearRegression['PipsReturn_Accum']).reshape( -1, 1) dfLinearRegression['Datetime'] = pd.to_datetime( dfLinearRegression.index.to_numpy()) #X_test.columns = ["Date"] dfLinearRegression['Datetime'] = pd.to_datetime( dfLinearRegression['Datetime']) dfLinearRegression['Datetime'] = dfLinearRegression[ 'Datetime'].map(dt.datetime.toordinal) x3 = np.asarray(dfLinearRegression['Datetime']) x3 = x3.reshape(-1, 1) model = LinearRegression() #create linear regression object model.fit(x3, y3) #train model on train data model.score(x3, y3) #check score coeffInclination = round(float(model.coef_[0]), 1) Intercept = round(float(model.intercept_[0]), 1) d_frame = { 'algo': 'RANDOMFOREST', 'modelSet': str(list(itm)), 'currentAccuracy': currentAccuracy, 'returnResult': str(returnResult), 'returnAccum': str(returnAccum), 'coeffInclination': str(coeffInclination), 'confusionMatrix': str(confusionMatrix), 'resultPrediction': resultPrediction[0] } new_row = pd.Series(data=d_frame) dfCurrentResult = dfCurrentResult.append(new_row, ignore_index=True) dfCurrentResult['coeffInclination'] = dfCurrentResult[ 'coeffInclination'].astype(float) dfCurrentResult = dfCurrentResult.sort_values(['coeffInclination'], ascending=False) dfTOP5Result = dfCurrentResult.head(topAlgorithmToCollection) dfResult = dfTOP5Result.copy() totalAnalyzed = dfResult["resultPrediction"].count() shortPercent = (dfResult[dfResult["resultPrediction"] == 0] ["resultPrediction"].count() / totalAnalyzed) * 100 longPercent = (dfResult[dfResult["resultPrediction"] == 1] ["resultPrediction"].count() / totalAnalyzed) * 100 position = "" percentual = 0 if longPercent > shortPercent: position = "LONG" percentual = longPercent else: position = "SHORT" percentual = shortPercent infoData = { 'position': position, 'percentual': str(round(percentual, 2)), 'modelSets': [] } for inteLine in dfResult.iterrows(): if str(inteLine[1]["resultPrediction"]) == "1": Posicao = "LONG" else: Posicao = "SHORT" x = { "Item": inteLine[1]["modelSet"], "Position": Posicao, "Accuracy": str(inteLine[1]["currentAccuracy"]), "coeffInclination": str(inteLine[1]["coeffInclination"]), "confusionMatrix": ast.literal_eval(inteLine[1]["confusionMatrix"]), "Return": ast.literal_eval(inteLine[1]["returnAccum"]) } infoData['modelSets'].append(x) infoData = json.dumps(infoData) predictionDate = dt.datetime.strptime(lastDate, "%d/%m/%Y").strftime("%Y%m%d") ExisteData = "SELECT * FROM " + predictTable + " WHERE data = '" + str( predictionDate) + "' and ticker='" + T + "'" df_consulta = db.queryData(ExisteData) if len(df_consulta) > 0: ExisteData = "DELETE FROM " + predictTable + " WHERE data = '" + str( predictionDate) + "' and ticker='" + T + "'" db.queryExecute(ExisteData) stringAdicionaOuAltera = "INSERT INTO " + predictTable + " (data,ticker,predict,datainfo,shortPercent,longPercent) VALUES (?,?,?,?,?,?)" db.queryExecute_Safe(stringAdicionaOuAltera, [ str(predictionDate), str(T), str(position), str(infoData), str(shortPercent), str(longPercent) ])
else: self.id = "error" # st.header('Company overview') # chart_data = pd.DataFrame(np.random.randn(20, 3),columns=['a', 'b', 'c']) # st.line_chart(chart_data) # chart_data = pd.DataFrame(np.random.randn(20, 3),columns=['a', 'b', 'c']) # st.line_chart(chart_data) # st.button("COMPRA") optionTicker = st.selectbox('Selecione o ticker', ('VALE3', 'INDFUT')) data_max = db.queryData( "SELECT max(data) as data FROM " + tabelaDePlotagem + " where ticker='" + optionTicker + "' ORDER BY DATA ASC", True) data_max = datetime.strptime(str(data_max["data"][0]), '%Y%m%d') data_min = db.queryData( "SELECT min(data) as data FROM " + tabelaDePlotagem + " where ticker='" + optionTicker + "' ORDER BY DATA ASC", True) data_min = datetime.strptime(str(data_min["data"][0]), '%Y%m%d') overall_win_query = db.queryData( "select data, (predicao==real) as validacao, pontos_real from " + tabelaDePlotagem + " where ticker='" + optionTicker + "' ORDER BY DATA ASC", True) overall_win_query["real_return"] = 0 overall_win_query['real_return'] = overall_win_query['real_return'].astype( 'float')