/
update_data.py
69 lines (60 loc) · 2.68 KB
/
update_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# Move the Update Data Logic Here
import simfin as sf
from load import load_dataset, load_shareprices
import pathlib
import os
from dotenv import load_dotenv
from predict import train, predict, predict_similiar
load_dotenv()
SIMFIN_API_KEY = os.getenv('SIMFIN_API_KEY', 'free')
MODELS_DIR = pathlib.Path('./models')
DATA_DIR = pathlib.Path('./data')
# LOAD
shareprices_df = load_shareprices(simfin_api_key=SIMFIN_API_KEY)
general_df = load_dataset(
dataset='general', simfin_api_key=SIMFIN_API_KEY, shareprices_df=shareprices_df)
banks_df = load_dataset(
dataset='banks', simfin_api_key=SIMFIN_API_KEY, shareprices_df=shareprices_df)
insurance_df = load_dataset(
dataset='insurance', simfin_api_key=SIMFIN_API_KEY, shareprices_df=shareprices_df)
# TRAIN
general_model = train(general_df,
winsor_quantile=0.01,
model_name='general_model',
feature_name='general',
param=dict(learning_rate=0.01,
max_depth=3,
subsample=.5,
colsample_bylevel=0.7,
colsample_bytree=0.7,
n_estimators=200))
banks_model = train(banks_df,
winsor_quantile=0.05,
model_name='banks_model',
feature_name='banks',
param=dict(learning_rate=0.01,
max_depth=2,
subsample=.8,
colsample_bylevel=0.7,
colsample_bytree=0.7,
n_estimators=200))
insurance_model = train(insurance_df,
winsor_quantile=0.08,
model_name='insurance_model',
feature_name='insurance',
param=dict(learning_rate=0.01,
max_depth=2,
subsample=1,
colsample_bylevel=0.7,
colsample_bytree=0.7,
n_estimators=150))
# PREDICT
general_df = predict(general_model, general_df, 'general_predictions')
banks_df = predict(banks_model, banks_df, 'banks_predictions')
insurance_df = predict(insurance_model, insurance_df, 'insurance_predictions')
# PREDICT SIMILIAR STOCKS
general_matrix_df = predict_similiar(
general_model, general_df, 'general_sim_matrix')
banks_matrix_df = predict_similiar(banks_model, banks_df, 'banks_sim_matrix')
insurance_matrix_df = predict_similiar(
insurance_model, insurance_df, 'insurance_sim_matrix')