Esempio n. 1
0
def get_stock_data(start_date:str, end_date:str, stocks_tradable:List[str], tech_indicator_list:List[str]):
    """
    start_date and end_date include the whole period from train, validation to test time periods
    """
    df = YahooDownloader(start_date=start_date,
                         end_date=end_date,
                         ticker_list=stocks_tradable).fetch_data()

    fe = FeatureEngineer(use_technical_indicator=True,
#                         tech_indicator_list = config.TECHNICAL_INDICATORS_LIST,
                         tech_indicator_list=tech_indicator_list,
                         use_turbulence=False,
                         user_defined_feature=False)

    processed = fe.preprocess_data(df)

    list_ticker = processed["tic"].unique().tolist()
    list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
    combination = list(itertools.product(list_date,list_ticker))

    processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
    processed_full = processed_full[processed_full['date'].isin(processed['date'])]
    processed_full = processed_full.sort_values(['date','tic'])

    processed_full = processed_full.fillna(0)
    return processed_full, list_date
Esempio n. 2
0
def baseline_strat(ticker='^DJI'):
    dji = YahooDownloader(start_date="2019-01-02",
                          end_date="2020-10-30",
                          ticker_list=[ticker]).fetch_data()
    dji['daily_return'] = dji['close'].pct_change(1)
    dow_strat = backtest_strat(dji)
    return dji, dow_strat
Esempio n. 3
0
 def setUp(cls):
     cls.ticker_list = ["AAPL", "GOOG"]
     cls.df = YahooDownloader(start_date="2009-01-01",
                              end_date="2021-01-01",
                              ticker_list=cls.ticker_list).fetch_data()
     print(f"df columns: {cls.df.columns}")
     cls.indicators = ["open", "close", "high", "low", "volume"]
Esempio n. 4
0
def fetch_initial_numerical(trade_date, prev_days=30, time_fmt="%Y-%m-%d"):
    start_date = datetime.datetime.strptime(trade_date,
                                            time_fmt) - datetime.timedelta(30)
    numerical_df = YahooDownloader(start_date=start_date.strftime(time_fmt),
                                   end_date=trade_date,
                                   ticker_list=stock_tickers).fetch_data()
    return numerical_df
Esempio n. 5
0
def main():
    parser = build_parser()
    options = parser.parse_args()
    if not os.path.exists("./" + config.DATA_SAVE_DIR):
        os.makedirs("./" + config.DATA_SAVE_DIR)
    if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
        os.makedirs("./" + config.TRAINED_MODEL_DIR)
    if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
        os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
    if not os.path.exists("./" + config.RESULTS_DIR):
        os.makedirs("./" + config.RESULTS_DIR)

    if options.mode == "train":
        import finrl.autotrain.training

        finrl.autotrain.training.train_one()

    elif options.mode == "download_data":
        from finrl.marketdata.yahoodownloader import YahooDownloader

        df = YahooDownloader(start_date=config.START_DATE,
                             end_date=config.END_DATE,
                             ticker_list=config.DOW_30_TICKER).fetch_data()
        now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")
        df.to_csv("./" + config.DATA_SAVE_DIR + "/" + now + ".csv")
Esempio n. 6
0
def baseline_strat(ticker, start, end):
    dji = YahooDownloader(start_date = start,
                     end_date = end,
                     ticker_list = [ticker]).fetch_data()
    dji['daily_return']=dji['close'].pct_change(1)
    dow_strat = backtest_strat(dji)
    return dji, dow_strat
Esempio n. 7
0
def main():
    start_date = '2020-01-01'
    trade_start_date = '2020-12-01'
    end_date = '2021-01-01'
    ticker_list = stock_tickers
    numerical_df = YahooDownloader(start_date=start_date,
                                   end_date=end_date,
                                   ticker_list=ticker_list).fetch_data()
    sentiment_df = generate_sentiment_scores(start_date, end_date)
    initial_data = get_initial_data(numerical_df, sentiment_df)
    train_data = data_split(initial_data, start_date, trade_start_date)
    trade_data = data_split(initial_data, trade_start_date, end_date)
    indicator_list = config.TECHNICAL_INDICATORS_LIST + ['sentiment']
    stock_dimension = len(trade_data.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        indicator_list) * stock_dimension
    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": indicator_list,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4,
        "print_verbosity": 5
    }
    e_train_gym = StockTradingEnv(df=train_data, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    # print(train_data.index)
    # print(trade_data.index)
    # print(trade_data.loc[0])
    e_trade_gym = OnlineStockTradingEnv(trade_data.loc[0], **env_kwargs)
    training_agent = DRLAgent(env=env_train)
    model_a2c = training_agent.get_model("a2c")
    # print(train_data.index)
    # print(trade_data.index)
    #trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c',total_timesteps=10000)
    feature_engineer = FeatureEngineer()
    online_stock_pred = OnlineStockPrediction(e_trade_gym, model_a2c)

    for i in range(1, trade_data.index.unique().max()):
        print(trade_data.loc[i])
        online_stock_pred.add_data(trade_data.loc[i])
        action, states, next_obs, rewards = online_stock_pred.predict()
        print("Action:", action)
        print("States: ", states)
        print("Next observation: ", next_obs)
        print("Rewards: ", rewards)
Esempio n. 8
0
def fetch_and_store(start_date=config.START_DATE,
                    end_date=None,
                    interval=None,
                    ticker_list=config.CRYPTO_TICKER):
    print("==============Start Fetching Data===========")

    df = YahooDownloader(start_date=start_date,
                         end_date=end_date
                         or datetime.utcnow().strftime("%Y-%m-%d"),
                         ticker_list=ticker_list).fetch_data()
    now = datetime.now().strftime(config.DATETIME_FMT)
    filename = f'./{config.DATA_SAVE_DIR}/{now}.csv'
    df.to_csv(filename)
    return df
Esempio n. 9
0
def get_baseline(ticker, start, end):
    
    if ticker == 'crypto':
        print('*********')
        
    else:
        dji = YahooDownloader(
            start_date=start, end_date=end, ticker_list=[ticker]
        ).fetch_data()

    
    print('here ' * 20)
    print(dji.head())
    
    return dji
Esempio n. 10
0
 def raw_data_download(raw_data_path, beg_date, end_date, ticker_list):
     if os.path.exists(raw_data_path):
         import pandas as pd
         raw_df = pd.read_pickle(raw_data_path)  # DataFrame of Pandas
         print('| raw_df.columns.values:', raw_df.columns.values)
         # ['date' 'open' 'high' 'low' 'close' 'volume' 'tic' 'day']
     else:
         from finrl.marketdata.yahoodownloader import YahooDownloader
         yd = YahooDownloader(
             start_date=beg_date,
             end_date=end_date,
             ticker_list=ticker_list,
         )
         raw_df = yd.fetch_data()
         raw_df.to_pickle(raw_data_path)
     return raw_df
Esempio n. 11
0
    def add_vix(self, data):
        """
        add vix from yahoo finance
        :param data: (df) pandas dataframe
        :return: (df) pandas dataframe
        """
        df = data.copy()
        df_vix = YahooDownloader(start_date = df.date.min(),
                                end_date = df.date.max(),
                                ticker_list = ["^VIX"]).fetch_data()
        vix = df_vix[['date','close']]
        vix.columns = ['date','vix']

        df = df.merge(vix, on="date")
        df = df.sort_values(["date", "tic"]).reset_index(drop=True)
        return df
Esempio n. 12
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(start_date=config.START_DATE,
                         end_date=config.END_DATE,
                         ticker_list=config.DOW_30_TICKER).fetch_data()
    print("==============Start Feature Engineering===========")
    df = FeatureEngineer(df,
                         feature_number=5,
                         use_technical_indicator=True,
                         use_turbulence=True).preprocess_data()

    train = data_split(df, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(df, config.START_TRADE_DATE, config.END_DATE)
    env_setup = EnvSetup(stock_dim=len(train.tic.unique()))
    env_train = env_setup.create_env_training(data=train,
                                              env_class=StockEnvTrain)
    agent = DRLAgent(env=env_train)
    print("==============Model Training===========")
    now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
    a2c_params_tuning = {
        'n_steps': 5,
        'ent_coef': 0.005,
        'learning_rate': 0.0007,
        'verbose': 0,
        'timesteps': 100000
    }
    model_a2c = agent.train_A2C(model_name="A2C_{}".format(now),
                                model_params=a2c_params_tuning)

    print("==============Start Trading===========")
    env_trade, obs_trade = env_setup.create_env_trading(
        data=trade, env_class=StockEnvTrade, turbulence_threshold=250)

    df_account_value = DRLAgent.DRL_prediction(model=model_a2c,
                                               test_data=trade,
                                               test_env=env_trade,
                                               test_obs=obs_trade)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/" + now + '.csv')

    print("==============Get Backtest Results===========")
    perf_stats_all = BackTestStats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + '.csv')
Esempio n. 13
0
def main():
    parser = build_parser()
    options = parser.parse_args()

    # add following folders
    if not os.path.exists("./" + config.DATA_SAVE_DIR):
        os.makedirs("./" + config.DATA_SAVE_DIR)

    # From config.py file get following:
    # start_date
    START_DATE = config.START_DATE

    # end_date
    END_DATE = config.END_DATE

    # list of stocks#
    STOCK_LIST = config.DOW_30_TICKER

    print("All stocks used for training:", STOCK_LIST)
    print("Historical data are used from: ", START_DATE)
    print("Till end date: ", END_DATE)

    # Download and save the data in a pandas DataFrame:
    data_frame = YahooDownloader(start_date=START_DATE,
                                 end_date=END_DATE,
                                 ticker_list=STOCK_LIST).fetch_data()

    print("Data Frame shape is: ", data_frame.shape)
    print("Data Frame format is following: \n\n", data_frame.head())

    ##
    ## Save downloaded data to file
    ##
    if os.path.exists("./" + config.DATA_SAVE_DIR + "/" + options.name +
                      ".csv"):
        os.remove("./" + config.DATA_SAVE_DIR + "/" + options.name + ".csv")
        print("The download data file deleted")
    else:
        print("The download data file does not exist")

    data_frame.to_csv("./" + config.DATA_SAVE_DIR + "/" + options.name +
                      ".csv")

    print(
        "Successfuly completed the task of downloading and saving financial data."
    )
Esempio n. 14
0
def test_process_data():
    start_date = '2020-11-01'
    end_date='2021-01-01'
    ticker_list=stock_tickers
    numerical_df = YahooDownloader(start_date=start_date,end_date=end_date,ticker_list=ticker_list).fetch_data()
    sentiment_df = generate_sentiment_scores(start_date,end_date)
    initial_data = get_initial_data(numerical_df,sentiment_df)
    trade_data = data_split(initial_data,start_date,'2020-12-01')
    numerical_feed_data = numerical_df[numerical_df.date > '2020-12-01']
    sentiment_feed_data = sentiment_df[sentiment_df.date > '2020-12-01']
    data_processor = DataProcessor(FeatureEngineer(),trade_data)
    for date in numerical_feed_data.date.unique():
        
        new_numerical = numerical_feed_data[numerical_feed_data.date==date]
        new_sentiment = sentiment_feed_data.loc[sentiment_feed_data.date==date]
        new_df=data_processor.process_data(new_numerical,new_sentiment)
        print(new_df)
Esempio n. 15
0
def load_stock_trading_data():
    from finrl.config import config

    cwd = './env/FinRL'
    raw_data_path = f'{cwd}/StockTradingEnv_raw_data.df'
    processed_data_path = f'{cwd}/StockTradingEnv_processed_data.df'

    os.makedirs(cwd, exist_ok=True)

    print("==============Start Fetching Data===========")
    if os.path.exists(raw_data_path):
        raw_df = pd.read_pickle(raw_data_path)  # DataFrame of Pandas
        print('| raw_df.columns.values:', raw_df.columns.values)
    else:
        from finrl.marketdata.yahoodownloader import YahooDownloader
        raw_df = YahooDownloader(
            start_date=config.START_DATE,
            end_date=config.END_DATE,
            ticker_list=config.DOW_30_TICKER,
        ).fetch_data()
        raw_df.to_pickle(raw_data_path)

    print("==============Start Feature Engineering===========")
    if os.path.exists(processed_data_path):
        processed_df = pd.read_pickle(processed_data_path)  # DataFrame of Pandas
        print('| processed_df.columns.values:', processed_df.columns.values)
    else:
        from finrl.preprocessing.preprocessors import FeatureEngineer
        fe = FeatureEngineer(
            use_technical_indicator=True,
            tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
            use_turbulence=True,
            user_defined_feature=False,
        )
        processed_df = fe.preprocess_data(raw_df)
        processed_df.to_pickle(processed_data_path)

    # Training & Trading data split
    from finrl.preprocessing.data import data_split
    train_df = data_split(processed_df, '2008-03-19', '2016-01-01')  # 1963/3223
    eval_df = data_split(processed_df, '2016-01-01', '2021-01-01')  # 1260/3223

    return train_df, eval_df
Esempio n. 16
0
def get_yahoo_data(start, end):
    df = YahooDownloader(start_date=start,
                         end_date=end,
                         ticker_list=config.DOW_30_TICKER).fetch_data()

    df.sort_values(['date', 'tic'], ignore_index=True)

    x = df.tic.unique()
    templ = []

    # get intersection data, smallest data
    for name, group in df.groupby('date'):
        g = group.tic.unique()
        if len(templ) == 0:
            templ = [i for i in g if i in x]
        else:
            templ = [i for i in g if i in templ]

    data_merge = pd.DataFrame(columns=list(df.columns))
    x = np.array(templ).reshape(-1, 1)
    temp_df = pd.DataFrame.from_records(x, columns=['tic'])

    for name, group in df.groupby('date'):
        temp_df['date'] = name

        result_outer = pd.merge(group, temp_df, on=['date', 'tic'])
        result_outer = result_outer.sort_values(['date', 'tic'],
                                                ignore_index=True)

        assert len(result_outer) == len(temp_df.tic.unique())
        data_merge = data_merge.append(result_outer)

    df = data_merge

    fe = FeatureEngineer(use_technical_indicator=True,
                         tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
                         use_turbulence=True,
                         user_defined_feature=False)

    processed = fe.preprocess_data(df)
    processed.sort_values(['date', 'tic'], ignore_index=True)

    return processed
Esempio n. 17
0
def prepare_data() -> (dict, pd.DataFrame):
    processed = load_from_cache()
    if processed is None:
        print("==============Start Fetching Data===========")
        df = YahooDownloader(
            start_date=config.START_DATE,
            end_date=config.END_DATE,
            ticker_list=config.CURRENT_TICKER,
        ).fetch_data()
        print("==============Start Feature Engineering===========")
        fe = FeatureEngineer(
            use_technical_indicator=True,
            tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
            use_turbulence=False,
            user_defined_feature=False,
        )

        processed = fe.preprocess_data(df)
        save(processed)

    # calculate state action space
    stock_dimension = len(processed.tic.unique())
    state_space = (2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension * 2)

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.01,
        "sell_cost_pct": 0.01,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }
    return env_kwargs, processed
Esempio n. 18
0
# from config.py end_date is a string
config.END_DATE


# In[7]:


print(config.DOW_30_TICKER)


# In[8]:


df = YahooDownloader(start_date = config.START_DATE,
                     end_date = config.END_DATE,
                     ticker_list = config.DOW_30_TICKER).fetch_data()


# # Part 4: Preprocess Data
# In[9]:

fe = FeatureEngineer(
                    use_technical_indicator=True,
                    tech_indicator_list = config.TECHNICAL_INDICATORS_LIST,
                    use_turbulence=True,
                    user_defined_feature = False)
processed = fe.preprocess_data(df)

# %% Show turbulence
# if error open VSCode Settings (JSON) and change 
Esempio n. 19
0
def get_baseline(ticker, start, end):
    dji = YahooDownloader(start_date=start, end_date=end,
                          ticker_list=[ticker]).fetch_data()
    return dji
Esempio n. 20
0
    def test_download(self):
        df = YahooDownloader(start_date="2019-01-01",
                             end_date="2019-02-01",
                             ticker_list=self.ticker_list).fetch_data()

        self.assertIsInstance(df, pd.DataFrame)
Esempio n. 21
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(start_date=config.START_DATE,
                         end_date=config.END_DATE,
                         ticker_list=config.SP_500_TICKER).fetch_data()
    print("==============Start Feature Engineering===========")
    df = FeatureEngineer(df, use_technical_indicator=True,
                         use_turbulence=True).preprocess_data()

    # Training & Trade data split
    train = data_split(df, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(df, config.START_TRADE_DATE, config.END_DATE)

    # data normalization
    #feaures_list = list(train.columns)
    #feaures_list.remove('date')
    #feaures_list.remove('tic')
    #feaures_list.remove('close')
    #print(feaures_list)
    #data_normaliser = preprocessing.StandardScaler()
    #train[feaures_list] = data_normaliser.fit_transform(train[feaures_list])
    #trade[feaures_list] = data_normaliser.fit_transform(trade[feaures_list])

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        config.TECHNICAL_INDICATORS_LIST) * stock_dimension

    env_setup = EnvSetup(stock_dim=stock_dimension,
                         state_space=state_space,
                         hmax=100,
                         initial_amount=3000,
                         transaction_cost_pct=0.001)

    env_train = env_setup.create_env_training(data=train,
                                              env_class=StockEnvTrain)
    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

    a2c_params_tuning = {
        'n_steps': 5,
        'ent_coef': 0.005,
        'learning_rate': 0.0007,
        'verbose': 0,
        'timesteps': 80000
    }

    model = agent.train_A2C(model_name="A2C_{}".format(now),
                            model_params=a2c_params_tuning)

    print("==============Start Trading===========")
    env_trade, obs_trade = env_setup.create_env_trading(
        data=trade, env_class=StockEnvTrade, turbulence_threshold=250)

    df_account_value, df_actions = DRLAgent.DRL_prediction(model=model,
                                                           test_data=trade,
                                                           test_env=env_trade,
                                                           test_obs=obs_trade)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" +
                            now + '.csv')
    df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now +
                      '.csv')

    print("==============Get Backtest Results===========")
    perf_stats_all = BackTestStats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + '.csv')
Esempio n. 22
0
    trained_model = agent.get_model(model,
                            model_kwargs = model_params,
                            verbose = 0).load(model_paths)



print('Testing...')
df_account_value, df_actions = DRLAgent.average_predict(
    model=trained_model,
    environment = test_gym_env,n_evals = 3)



print('Comparing to DJI')
dji = YahooDownloader(
            start_date=split_date, end_date=end_date, ticker_list=['^DJI']
        ).fetch_data()
dates_rl = matplotlib.dates.date2num(df_account_value['date'])
dates_base = matplotlib.dates.date2num(dji['date'])


init_dji_shares = 1000000/dji['close'][0]


plt.plot_date(dates_rl,df_account_value['account_value'],'-')
plt.plot_date(dates_base,dji['close'] * init_dji_shares,'-')
plt.legend(['RL','DJI'])
plt.title(f'{model} model trained from {start_date}-{split_date}')
plt.ylabel('Account Value')
plt.savefig(f'imgs/{model}_vs_dji_{split_date}_{end_date}.png')
Esempio n. 23
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(
        start_date=config.START_DATE,
        end_date=config.END_DATE,
        ticker_list=['FXAIX'],
    ).fetch_data()
    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    # Training & Trading data split
    train = data_split(processed, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + 2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)
    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }
    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")
    user_input = input('train model? 1 train 0 don\'t train')
    if user_input == 1:
        model_sac = agent.get_model("sac")
        trained_sac = agent.train_model(model=model_sac,
                                        tb_log_name="sac",
                                        total_timesteps=8000)
        trained_sac.save("../models/sac_8k" + df.tic[0] + "_frl")
    else:
        trained_sac = SAC.load('../models/sac_80k_msft_working')
    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        trained_sac, e_trade_gym)
    df_account_value.to_csv("../" + config.RESULTS_DIR +
                            "/SAC_df_account_value_" + df.tic[0] + "_" + now +
                            ".csv")
    df_actions.to_csv("../" + config.RESULTS_DIR + "/SAC_df_actions_" +
                      df.tic[0] + "_" + now + ".csv")

    # print("==============Get Backtest Results===========")
    perf_stats_all = backtest_stats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("../" + config.RESULTS_DIR + "/SAC_perf_stats_all_" +
                          df.tic[0] + "_" + now + ".csv")

    #plot acc value
    actions = df_actions['actions']
    x = np.arange(0, df_account_value['account_value'].shape[0])
    y = df_account_value['account_value']

    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)

    fig, axs = plt.subplots(2, 1, sharex=True, sharey=False)

    # plt.plot(x, y)

    # Use a boundary norm instead
    cmap = ListedColormap(['r', 'g', 'b'])
    norm = BoundaryNorm([-100, -0.1, 0.1, 100], cmap.N)
    lc = LineCollection(segments, cmap=cmap, norm=norm)
    lc.set_array(actions)
    lc.set_linewidth(2)
    line = axs[0].add_collection(lc)
    # fig.colorbar(line, ax=axs)

    axs[1].set_xlabel('Trading Day (' + 'From ' + config.START_TRADE_DATE +
                      " to " + config.END_DATE + ')')
    axs[0].set_ylabel('Account Value (10000 of USD)')
    axs[0].set_title("Trading Test on " + df.tic[0])

    axs[0].set_xlim(x.min(), x.max())
    axs[0].set_ylim(y.min(), y.max())

    custom_lines = [
        Line2D([0], [0], color=cmap(0.), lw=4),
        Line2D([0], [0], color=cmap(.5), lw=4),
        Line2D([0], [0], color=cmap(1.), lw=4)
    ]

    # lines = ax.plot(data)
    axs[0].legend(custom_lines, ['Sell', 'Hold', 'Buy'])

    #plot stock value
    tx = np.arange(0, df_account_value['account_value'].shape[0])
    ty = trade['close']
    plt.ylabel('Price (USD)')
    plt.title(df.tic[0] + " Closing Price")
    plt.plot(tx, ty)

    plt.savefig("../" + config.RESULTS_DIR + "/plots/"
                "SAC_plot_" + df.tic[0] + "_" + now + ".png")
Esempio n. 24
0
def get_dataset(datadir,data_type,start_date,end_date):

    if not data_type in config.SUPPORTED_DATA:
        raise ValueError('Market type not supported')


    data_path = os.path.join(datadir,data_type + '.csv')

    if not os.path.exists(data_path):
        if data_type == 'dow29':
            # If we don't have the data, we can download dow data from yahoo finance
            stock_tickers = config.DOW_30_TICKER_MINUS_VISA
            indicators = config.TECHNICAL_INDICATORS_LIST
            print('Getting Data: ')
            df = YahooDownloader(start_date = '2000-01-01',
                                 end_date = '2021-01-01',
                                 ticker_list = stock_tickers).fetch_data()

            fe = FeatureEngineer(
                            use_technical_indicator=True,
                            tech_indicator_list = indicators,
                            use_turbulence=True,
                            user_defined_feature = False)




            print('Adding Indicators')
            processed = fe.preprocess_data(df)

            list_ticker = processed["tic"].unique().tolist()
            list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
            combination = list(itertools.product(list_date,list_ticker))

            processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
            processed_full = processed_full[processed_full['date'].isin(processed['date'])]
            processed_full = processed_full.sort_values(['date','tic'])

            processed_full = processed_full.fillna(0)
            processed.to_csv(data_path,index = False)

        elif data_type == 'nas29':
            # If we don't have the data, we can download dow data from yahoo finance
            stock_tickers = config.NAS_29_TICKER
            indicators = config.TECHNICAL_INDICATORS_LIST
            print('Getting Data: ')
            df = YahooDownloader(start_date = '2000-01-01',
                                 end_date = '2021-01-01',
                                 ticker_list = stock_tickers).fetch_data()

            fe = FeatureEngineer(
                            use_technical_indicator=True,
                            tech_indicator_list = indicators,
                            use_turbulence=True,
                            user_defined_feature = False)




            print('Adding Indicators')
            processed = fe.preprocess_data(df)

            list_ticker = processed["tic"].unique().tolist()
            list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
            combination = list(itertools.product(list_date,list_ticker))

            processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
            processed_full = processed_full[processed_full['date'].isin(processed['date'])]
            processed_full = processed_full.sort_values(['date','tic'])

            processed_full = processed_full.fillna(0)
            processed.to_csv(data_path,index = False)

        elif data_type == 'dow290':
            raise ValueError('Need to add dow29 with zeros crossings to data directory')
        elif data_type == 'dow29w0':
            raise ValueError('Need to add dow29 with OUT zeros crossings to data directory')
        else:
            raise ValueError('Need to add crypto data to data directory')

    # Load and subset data
    full_df = pd.read_csv(data_path)

    max_date = max(full_df['date'])
    min_date = min(full_df['date'])


    if not (min_date <= start_date):
        warnings.warn('Earliest possible start date is {}: You have chosen {}. The later date will be used'.format(min_date,start_date))
    if not (max_date >= end_date):
        warnings.warn('Latest possible end date is {}: You have chosen {}. The earlier date will be used'.format(max_date,end_date))

    to_return = data_split(full_df,start_date,end_date)


    return to_return
Esempio n. 25
0
                        metavar='<end_date>',
                        help='training data end date')
    parser.add_argument('-t',
                        '--trade-date',
                        metavar='<trade_date>',
                        help='trading start date')
    args = parser.parse_args()
    consumer = KafkaConsumer(args.topic, auto_offset_reset='latest', \
            bootstrap_servers=args.hosts, api_version=(0, 10), consumer_timeout_ms=1000)

    producer = KafkaProducer(bootstrap_servers=args.hosts, api_version=(0, 10))
    # data initialization
    tday = datetime.date.today()
    yday = tday - datetime.timedelta(days=1)
    fmt = "%Y-%m-%d"
    numerical_df = YahooDownloader(args.start_date, args.end_date,
                                   config.stock_tickers).fetch_data()
    sentiment_df = generate_sentiment_scores(args.start_date, args.end_date)
    initial_data = get_initial_data(numerical_df, sentiment_df)
    data_processor = DataProcessor(FeatureEngineer(), initial_data)

    new_numerical = YahooDownloader(datetime.datetime.strftime(yday, fmt),
                                    datetime.datetime.strftime(tday, fmt),
                                    config.stock_tickers).fetch_data()
    new_sentiment = generate_sentiment_scores(
        datetime.datetime.strftime(yday, fmt),
        datetime.datetime.strftime(yday, fmt))
    # set up model to train on initial data
    load_path = "./trained_models/a2c_2019-2020_80k.zip"
    model = setup_model(initial_data)

    while consumer is None:
# In[7]:


# from config.py end_date is a string
config.END_DATE


# ticker_list is a list of stock tickers, in a single stock trading case, the list contains only 1 ticker

# In[8]:


# Download and save the data in a pandas DataFrame:
data_df = YahooDownloader(start_date = '2009-01-01',
                          end_date = '2021-01-01',
                          ticker_list = ['AAPL']).fetch_data()


# In[9]:


data_df.shape


# In[10]:


data_df.head()

Esempio n. 27
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(
        start_date=config.START_DATE,
        end_date=config.END_DATE,
        ticker_list=config.DOW_30_TICKER,
    ).fetch_data()
    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    # Training & Trading data split
    train = data_split(processed, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + 2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")

    model_sac = agent.get_model("sac")
    trained_sac = agent.train_model(model=model_sac,
                                    tb_log_name="sac",
                                    total_timesteps=80000)

    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_sac,
                                                           test_data=trade,
                                                           test_env=env_trade,
                                                           test_obs=obs_trade)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" +
                            now + ".csv")
    df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now +
                      ".csv")

    print("==============Get Backtest Results===========")
    perf_stats_all = BackTestStats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + ".csv")
Esempio n. 28
0
    df_account_value, df_actions = DRLAgent.average_predict(
        model=trained_model,
        environment = test_gym_env,n_evals = 10)

    df_account_value.to_csv('results/' + model +'account_values_10avg.csv')
    df_actions.to_csv('results/' + model +'actions_10avg.csv')
    dates_rl = matplotlib.dates.date2num(df_account_value['date'])

    plt.plot_date(dates_rl,df_account_value['account_value'],'-')


end_date = '2018-11-28' # Model is tested from split_date to end_date
split_date = '2016-10-01'
print('Comparing to Index')
dji = YahooDownloader(
            start_date=split_date, end_date=end_date, ticker_list=['^IXIC'] # Dow = '^DJI', NASDAQ = '^IXIC'
        ).fetch_data()
model_names.append('Nas.')
dates_rl = matplotlib.dates.date2num(df_account_value['date'])
dates_base = matplotlib.dates.date2num(dji['date'])


init_dji_shares = 1000000/dji['close'][0]

plt.plot_date(dates_base,dji['close'] * init_dji_shares,'-')
plt.legend(model_names)
plt.xticks(rotation=30)
plt.title(f'Models Trained from {start_date}-{split_date}')
plt.ylabel('Account Value')
plt.savefig(f'imgs/models_vs_NASDAQ_{split_date}_{end_date}.png')
Esempio n. 29
0
def main():
    if not os.path.exists("./" + config.DATA_SAVE_DIR):
        os.makedirs("./" + config.DATA_SAVE_DIR)
    if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
        os.makedirs("./" + config.TRAINED_MODEL_DIR)
    if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
        os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
    if not os.path.exists("./" + config.RESULTS_DIR):
        os.makedirs("./" + config.RESULTS_DIR)

    print(config.START_DATE)
    print(config.END_DATE)
    print(config.PENNY_STOCKS)

    df = YahooDownloader(start_date=config.START_DATE,
                         end_date=config.END_DATE,
                         ticker_list=config.PENNY_STOCKS).fetch_data()

    fe = FeatureEngineer(use_technical_indicator=True,
                         tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
                         use_turbulence=True,
                         user_defined_feature=False)

    processed = fe.preprocess_data(df)
    information_cols = list(processed)
    information_cols.remove('date')
    information_cols.remove('tic')

    stock_dimension = len(processed.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        information_cols) * stock_dimension
    print("Stock Dimension: {}, State Space: {}".format(
        stock_dimension, state_space))

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 5000,
        # Since in Indonesia the minimum number of shares per trx is 100, then we scaled the initial amount by dividing it with 100
        "buy_cost_pct": 0.00,  # IPOT has 0.19% buy cost
        "sell_cost_pct": 0.00,  # IPOT has 0.29% sell cost
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": information_cols,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4,
        "print_verbosity": 5
    }

    rebalance_window = 63  # rebalance_window is the number of days to retrain the model
    validation_window = 63  # validation_window is the number of days to do validation and trading (e.g. if validation_window=63, then both validation and trading period will be 63 days)
    train_start = config.START_DATE
    train_end = config.START_TRADE_DATE
    val_test_start = config.START_TRADE_DATE
    val_test_end = config.END_DATE

    ensemble_agent = DRLEnsembleAgent(df=processed,
                                      train_period=(train_start, train_end),
                                      val_test_period=(val_test_start,
                                                       val_test_end),
                                      rebalance_window=rebalance_window,
                                      validation_window=validation_window,
                                      **env_kwargs)

    A2C_model_kwargs = {
        'n_steps': 5,
        'ent_coef': 0.01,
        'learning_rate': 0.0005
    }

    PPO_model_kwargs = {
        "ent_coef": 0.01,
        "n_steps": 2048,
        "learning_rate": 0.00025,
        "batch_size": 128
    }

    DDPG_model_kwargs = {
        "action_noise": "ornstein_uhlenbeck",
        "buffer_size": 50000,
        "learning_rate": 0.000005,
        "batch_size": 128
    }

    TD3_model_kwargs = {
        "batch_size": 100,
        "buffer_size": 1000000,
        "learning_rate": 0.001
    }

    timesteps_dict = {'a2c': 4000, 'ppo': 4000, 'ddpg': 4000, 'td3': 4000}

    df_summary = ensemble_agent.run_ensemble_strategy(A2C_model_kwargs,
                                                      PPO_model_kwargs,
                                                      DDPG_model_kwargs,
                                                      TD3_model_kwargs,
                                                      timesteps_dict)

    print(df_summary)

    unique_trade_date = processed[(processed.date > val_test_start) & (
        processed.date <= val_test_end)].date.unique()

    df_trade_date = pd.DataFrame({'datadate': unique_trade_date})

    df_account_value = pd.DataFrame()
    for i in range(rebalance_window + validation_window,
                   len(unique_trade_date) + 1, rebalance_window):
        print(rebalance_window + validation_window)
        print(len(unique_trade_date) + 1)
        print(rebalance_window)
        try:
            temp = pd.read_csv('results/account_value_trade_{}_{}.csv'.format(
                'ensemble', i))
            df_account_value = df_account_value.append(temp, ignore_index=True)
        except:
            break
    sharpe = (252**0.5) * df_account_value.account_value.pct_change(
        1).mean() / df_account_value.account_value.pct_change(1).std()
    print('Sharpe Ratio: ', sharpe)
    df_account_value = df_account_value.join(
        df_trade_date[validation_window:].reset_index(drop=True))

    df_account_value.account_value.plot()

    print("==============Get Backtest Results===========")
    now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

    perf_stats_all = backtest_stats(account_value=df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)

    print("==============Compare to IHSG===========")
    backtest_plot(df_account_value,
                  baseline_ticker='^DJI',
                  baseline_start=df_account_value.loc[0, 'date'],
                  baseline_end=df_account_value.loc[len(df_account_value) - 1,
                                                    'date'])
    os.makedirs("./" + config.TRAINED_MODEL_DIR)
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)

config.START_DATE

config.END_DATE

print(config.DOW_30_TICKER)

############# DOWNLOAD DATA #############

df = YahooDownloader(start_date='2008-01-01',
                     end_date='2021-01-01',
                     ticker_list=config.DOW_30_TICKER).fetch_data()

df.shape
df.sort_values(['date', 'tic'], ignore_index=True).head()
df.info()

############# PERFORM FEATURE ENGINEERING #############

fe = FeatureEngineer(use_technical_indicator=True,
                     tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
                     use_turbulence=True,
                     user_defined_feature=False)

processed = fe.preprocess_data(df)