예제 #1
0
def get_stock_data(start_date:str, end_date:str, stocks_tradable:List[str], tech_indicator_list:List[str]):
    """
    start_date and end_date include the whole period from train, validation to test time periods
    """
    df = YahooDownloader(start_date=start_date,
                         end_date=end_date,
                         ticker_list=stocks_tradable).fetch_data()

    fe = FeatureEngineer(use_technical_indicator=True,
#                         tech_indicator_list = config.TECHNICAL_INDICATORS_LIST,
                         tech_indicator_list=tech_indicator_list,
                         use_turbulence=False,
                         user_defined_feature=False)

    processed = fe.preprocess_data(df)

    list_ticker = processed["tic"].unique().tolist()
    list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
    combination = list(itertools.product(list_date,list_ticker))

    processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
    processed_full = processed_full[processed_full['date'].isin(processed['date'])]
    processed_full = processed_full.sort_values(['date','tic'])

    processed_full = processed_full.fillna(0)
    return processed_full, list_date
예제 #2
0
    def raw_data_preprocess(
        prp_data_path,
        df,
        beg_date,
        end_date,
        tech_id_list,
    ):
        if os.path.exists(prp_data_path):
            import pandas as pd
            df = pd.read_pickle(prp_data_path)  # DataFrame of Pandas
        else:
            from finrl.preprocessing.preprocessors import FeatureEngineer
            fe = FeatureEngineer(
                use_technical_indicator=True,
                tech_indicator_list=tech_id_list,
                use_turbulence=True,
                user_defined_feature=False,
            )
            df = fe.preprocess_data(df)  # preprocess raw_df

            df = df[(df.date >= beg_date) & (df.date < end_date)]
            df = df.sort_values(["date", "tic"], ignore_index=True)
            df.index = df.date.factorize()[0]

            df.to_pickle(prp_data_path)

        print('| df.columns.values:', df.columns.values)
        assert all(df.columns.values == [
            'date', 'open', 'high', 'low', 'close', 'volume', 'tic', 'day',
            'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30',
            'close_30_sma', 'close_60_sma', 'turbulence'
        ])
        return df
예제 #3
0
def get_feature_engineered_df(df):
    fe = FeatureEngineer(df.copy(),
                         use_technical_indicator=True,
                         tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
                         use_turbulence=True,
                         user_defined_feature=False)

    df = fe.preprocess_data()
    return df
예제 #4
0
def get_yahoo_data(start, end):
    df = YahooDownloader(start_date=start,
                         end_date=end,
                         ticker_list=config.DOW_30_TICKER).fetch_data()

    df.sort_values(['date', 'tic'], ignore_index=True)

    x = df.tic.unique()
    templ = []

    # get intersection data, smallest data
    for name, group in df.groupby('date'):
        g = group.tic.unique()
        if len(templ) == 0:
            templ = [i for i in g if i in x]
        else:
            templ = [i for i in g if i in templ]

    data_merge = pd.DataFrame(columns=list(df.columns))
    x = np.array(templ).reshape(-1, 1)
    temp_df = pd.DataFrame.from_records(x, columns=['tic'])

    for name, group in df.groupby('date'):
        temp_df['date'] = name

        result_outer = pd.merge(group, temp_df, on=['date', 'tic'])
        result_outer = result_outer.sort_values(['date', 'tic'],
                                                ignore_index=True)

        assert len(result_outer) == len(temp_df.tic.unique())
        data_merge = data_merge.append(result_outer)

    df = data_merge

    fe = FeatureEngineer(use_technical_indicator=True,
                         tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
                         use_turbulence=True,
                         user_defined_feature=False)

    processed = fe.preprocess_data(df)
    processed.sort_values(['date', 'tic'], ignore_index=True)

    return processed
예제 #5
0
def load_stock_trading_data():
    from finrl.config import config

    cwd = './env/FinRL'
    raw_data_path = f'{cwd}/StockTradingEnv_raw_data.df'
    processed_data_path = f'{cwd}/StockTradingEnv_processed_data.df'

    os.makedirs(cwd, exist_ok=True)

    print("==============Start Fetching Data===========")
    if os.path.exists(raw_data_path):
        raw_df = pd.read_pickle(raw_data_path)  # DataFrame of Pandas
        print('| raw_df.columns.values:', raw_df.columns.values)
    else:
        from finrl.marketdata.yahoodownloader import YahooDownloader
        raw_df = YahooDownloader(
            start_date=config.START_DATE,
            end_date=config.END_DATE,
            ticker_list=config.DOW_30_TICKER,
        ).fetch_data()
        raw_df.to_pickle(raw_data_path)

    print("==============Start Feature Engineering===========")
    if os.path.exists(processed_data_path):
        processed_df = pd.read_pickle(processed_data_path)  # DataFrame of Pandas
        print('| processed_df.columns.values:', processed_df.columns.values)
    else:
        from finrl.preprocessing.preprocessors import FeatureEngineer
        fe = FeatureEngineer(
            use_technical_indicator=True,
            tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
            use_turbulence=True,
            user_defined_feature=False,
        )
        processed_df = fe.preprocess_data(raw_df)
        processed_df.to_pickle(processed_data_path)

    # Training & Trading data split
    from finrl.preprocessing.data import data_split
    train_df = data_split(processed_df, '2008-03-19', '2016-01-01')  # 1963/3223
    eval_df = data_split(processed_df, '2016-01-01', '2021-01-01')  # 1260/3223

    return train_df, eval_df
예제 #6
0
def main():
    start_date = '2020-01-01'
    trade_start_date = '2020-12-01'
    end_date = '2021-01-01'
    ticker_list = stock_tickers
    numerical_df = YahooDownloader(start_date=start_date,
                                   end_date=end_date,
                                   ticker_list=ticker_list).fetch_data()
    sentiment_df = generate_sentiment_scores(start_date, end_date)
    initial_data = get_initial_data(numerical_df, sentiment_df)
    train_data = data_split(initial_data, start_date, trade_start_date)
    trade_data = data_split(initial_data, trade_start_date, end_date)
    indicator_list = config.TECHNICAL_INDICATORS_LIST + ['sentiment']
    stock_dimension = len(trade_data.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        indicator_list) * stock_dimension
    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": indicator_list,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4,
        "print_verbosity": 5
    }
    e_train_gym = StockTradingEnv(df=train_data, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    # print(train_data.index)
    # print(trade_data.index)
    # print(trade_data.loc[0])
    e_trade_gym = OnlineStockTradingEnv(trade_data.loc[0], **env_kwargs)
    training_agent = DRLAgent(env=env_train)
    model_a2c = training_agent.get_model("a2c")
    # print(train_data.index)
    # print(trade_data.index)
    #trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c',total_timesteps=10000)
    feature_engineer = FeatureEngineer()
    online_stock_pred = OnlineStockPrediction(e_trade_gym, model_a2c)

    for i in range(1, trade_data.index.unique().max()):
        print(trade_data.loc[i])
        online_stock_pred.add_data(trade_data.loc[i])
        action, states, next_obs, rewards = online_stock_pred.predict()
        print("Action:", action)
        print("States: ", states)
        print("Next observation: ", next_obs)
        print("Rewards: ", rewards)
예제 #7
0
파일: data.py 프로젝트: ruichengHan/FinRL
def prepare_data() -> (dict, pd.DataFrame):
    processed = load_from_cache()
    if processed is None:
        print("==============Start Fetching Data===========")
        df = YahooDownloader(
            start_date=config.START_DATE,
            end_date=config.END_DATE,
            ticker_list=config.CURRENT_TICKER,
        ).fetch_data()
        print("==============Start Feature Engineering===========")
        fe = FeatureEngineer(
            use_technical_indicator=True,
            tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
            use_turbulence=False,
            user_defined_feature=False,
        )

        processed = fe.preprocess_data(df)
        save(processed)

    # calculate state action space
    stock_dimension = len(processed.tic.unique())
    state_space = (2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension * 2)

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.01,
        "sell_cost_pct": 0.01,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }
    return env_kwargs, processed
예제 #8
0
def main():
    initialize()
    parser = build_parser()
    options = parser.parse_args()

    if options.mode == "train":
        import finrl.autotrain.training

        finrl.autotrain.training.train_one()
    elif options.mode == "download_data":
        print('Download Data Begin')

        dow_30 = si.tickers_dow()
        # ETF
        #dftmp = pd.read_csv('data/etf_tom.csv',index_col=0)
        #dow_30 = dftmp.tic.unique()

        # DOW30

        dftmp = pd.read_csv('data/tom_dow_done_data.csv', index_col=0)
        dow_30 = dftmp.tic.unique()
        #dow_30 = ['DSS','AAPL','INFY']
        #dow_30 = ['^DJI']
        price_data = {ticker: si.get_data(ticker) for ticker in dow_30}
        df = reduce(lambda x, y: x.append(y), price_data.values())
        df.reset_index(inplace=True)
        df = df.rename(columns={'index': 'date', 'ticker': 'tic'})

        fe = FeatureEngineer(use_technical_indicator=True,
                             use_turbulence=False,
                             user_defined_feature=False)

        df = fe.preprocess_data(df)
        now = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        df.to_csv(config.DATA_SAVE_DIR + "/" + "dow30_" + now + ".csv",
                  index=False)
        print('Download Complete')
예제 #9
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(start_date=config.START_DATE,
                         end_date=config.END_DATE,
                         ticker_list=config.DOW_30_TICKER).fetch_data()
    print("==============Start Feature Engineering===========")
    df = FeatureEngineer(df,
                         feature_number=5,
                         use_technical_indicator=True,
                         use_turbulence=True).preprocess_data()

    train = data_split(df, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(df, config.START_TRADE_DATE, config.END_DATE)
    env_setup = EnvSetup(stock_dim=len(train.tic.unique()))
    env_train = env_setup.create_env_training(data=train,
                                              env_class=StockEnvTrain)
    agent = DRLAgent(env=env_train)
    print("==============Model Training===========")
    now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
    a2c_params_tuning = {
        'n_steps': 5,
        'ent_coef': 0.005,
        'learning_rate': 0.0007,
        'verbose': 0,
        'timesteps': 100000
    }
    model_a2c = agent.train_A2C(model_name="A2C_{}".format(now),
                                model_params=a2c_params_tuning)

    print("==============Start Trading===========")
    env_trade, obs_trade = env_setup.create_env_trading(
        data=trade, env_class=StockEnvTrade, turbulence_threshold=250)

    df_account_value = DRLAgent.DRL_prediction(model=model_a2c,
                                               test_data=trade,
                                               test_env=env_trade,
                                               test_obs=obs_trade)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/" + now + '.csv')

    print("==============Get Backtest Results===========")
    perf_stats_all = BackTestStats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + '.csv')
예제 #10
0
def test_process_data():
    start_date = '2020-11-01'
    end_date='2021-01-01'
    ticker_list=stock_tickers
    numerical_df = YahooDownloader(start_date=start_date,end_date=end_date,ticker_list=ticker_list).fetch_data()
    sentiment_df = generate_sentiment_scores(start_date,end_date)
    initial_data = get_initial_data(numerical_df,sentiment_df)
    trade_data = data_split(initial_data,start_date,'2020-12-01')
    numerical_feed_data = numerical_df[numerical_df.date > '2020-12-01']
    sentiment_feed_data = sentiment_df[sentiment_df.date > '2020-12-01']
    data_processor = DataProcessor(FeatureEngineer(),trade_data)
    for date in numerical_feed_data.date.unique():
        
        new_numerical = numerical_feed_data[numerical_feed_data.date==date]
        new_sentiment = sentiment_feed_data.loc[sentiment_feed_data.date==date]
        new_df=data_processor.process_data(new_numerical,new_sentiment)
        print(new_df)
예제 #11
0
def get_initial_data(numerical_df, sentiment_df, use_turbulence=False):
    fe = FeatureEngineer(use_turbulence=use_turbulence)
    numerical_df = fe.preprocess_data(numerical_df)
    df = numerical_df.merge(sentiment_df, on=["date", "tic"], how="left")
    df.fillna(0)
    return df
예제 #12
0
def main():
    parser = build_parser()
    options = parser.parse_args()

    # Basic setup
    #Disable warnings
    warnings.filterwarnings('ignore')

    # Load the saved data in a pandas DataFrame:
    data_frame = pd.read_csv("./" + config.DATA_SAVE_DIR + "/" + options.name +
                             ".csv")

    print("Data Frame shape is: ", data_frame.shape)
    print("Data Frame format is following: \n\n", data_frame.head())

    ## we store the stockstats technical indicator column names in config.py
    tech_indicator_list = config.TECHNICAL_INDICATORS_LIST
    print("Technical Indicators that are going to be calculated: ",
          tech_indicator_list)

    feature_engineering = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=tech_indicator_list,
        use_turbulence=True,
        user_defined_feature=False)

    processed = feature_engineering.preprocess_data(data_frame)

    print(processed.sort_values(['date', 'tic'], ignore_index=True).head(10))

    training_set = data_split(processed, config.START_DATE,
                              config.START_TRADE_DATE)
    testing_set = data_split(processed, config.START_TRADE_DATE,
                             config.END_DATE)
    print("Size of training set: ", len(training_set))
    print("Size of testing set: ", len(testing_set))

    print("Training set format:\n\n", training_set.head())

    print("Testing set format: \n\n", testing_set.head())

    stock_dimension = len(training_set.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        tech_indicator_list) * stock_dimension
    print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

    ##
    ## Save data to file, both training and trading
    ##
    if os.path.exists("./" + config.DATA_SAVE_DIR + "/training.txt"):
        os.remove("./" + config.DATA_SAVE_DIR + "/training.txt")
        print("The training data file deleted")
    else:
        print("The training data file does not exist")

    if os.path.exists("./" + config.DATA_SAVE_DIR + "/testing.txt"):
        os.remove("./" + config.DATA_SAVE_DIR + "/testing.txt")
        print("The testing data file deleted")
    else:
        print("The testing data file does not exist")

    path_training = "./" + config.DATA_SAVE_DIR + "/training.txt"
    path_testing = "./" + config.DATA_SAVE_DIR + "/testing.txt"

    with open(path_training, "wb") as f:
        pickle.dump(training_set, f, pickle.HIGHEST_PROTOCOL)

    with open(path_testing, "wb") as f:
        pickle.dump(testing_set, f, pickle.HIGHEST_PROTOCOL)

    print(
        "Successfuly completed the task of creation of test and training data files."
    )
## user can add more technical indicators
## check https://github.com/jealous/stockstats for different names
tech_indicator_list=tech_indicator_list+['kdjk','open_2_sma','boll','close_10.0_le_5_c','wr_10','dma','trix']
print(tech_indicator_list)


# <a id='3.2'></a>
# ## 4.2 Perform Feature Engineering

# In[14]:


fe = FeatureEngineer(
                    use_technical_indicator=True,
                    tech_indicator_list = tech_indicator_list,
                    use_turbulence=False,
                    user_defined_feature = False)

data_df = fe.preprocess_data(data_df)


# In[15]:


data_df.head()


# <a id='4'></a>
# # Part 5. Build Environment
# Considering the stochastic and interactive nature of the automated stock trading tasks, a financial task is modeled as a **Markov Decision Process (MDP)** problem. The training process involves observing stock price change, taking an action and reward's calculation to have the agent adjusting its strategy accordingly. By interacting with the environment, the trading agent will derive a trading strategy with the maximized rewards as time proceeds.
예제 #14
0
def generate_data(rollouts, data_dir, noise_type): # pylint: disable=R0914
    """ Generates data """
    assert exists(data_dir), "The data directory does not exist..."


    df = YahooDownloader(start_date = '2009-01-01',
                        end_date = '2021-01-01',
                       ticker_list = ['AAPL']).fetch_data()

    df.sort_values(['date','tic'],ignore_index=True)

    fe = FeatureEngineer(
                        use_technical_indicator=True,
                        tech_indicator_list = config.TECHNICAL_INDICATORS_LIST,
                        use_turbulence=True,
                        user_defined_feature = False)

    processed = fe.preprocess_data(df)

    
    list_ticker = processed["tic"].unique().tolist()
    list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
    combination = list(itertools.product(list_date,list_ticker))

    processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
    processed_full = processed_full[processed_full['date'].isin(processed['date'])]
    processed_full = processed_full.sort_values(['date','tic'])

    processed_full = processed_full.fillna(0)


    processed_full.sort_values(['date','tic'],ignore_index=True)

    train = data_split(processed_full, '2009-01-01','2019-01-01')
    trade = data_split(processed_full, '2019-01-01','2021-01-01')
    stock_dimension = len(train.tic.unique())
    state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension
    env_kwargs = {
                "hmax": 100, 
                    "initial_amount": 1000000, 
#                         "buy_cost_pct": 0.001i,
#                             "sell_cost_pct": 0.001,
                             "transaction_cost_pct": 0.001, 
                                "state_space": state_space, 
                                    "stock_dim": stock_dimension, 
                                        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, 
                                            "action_space": stock_dimension, 
                                                "reward_scaling": 1e-4
                                                }

    e_train_gym = StockTradingEnv(df = train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    env = env_train

#     env = gym.make("CarRacing-v0")

    seq_len = 10000

    for i in range(rollouts):

        env.reset()

#         env.env.viewer.window.dispatch_events()
        if noise_type == 'white':
            a_rollout = [env.action_space.sample() for _ in range(seq_len)]
        elif noise_type == 'brown':
            a_rollout = sample_continuous_policy(env.action_space, seq_len, 1. / 50)

        s_rollout = []
        r_rollout = []
        d_rollout = []


        t = 0
        while True:
            action = a_rollout[t]
            t += 1

            s, r, done, _ = env.step(action)
#             env.env.viewer.window.dispatch_events()
            s_rollout += [s]
            r_rollout += [r]
            d_rollout += [done]
            if done:
                print("> End of rollout {}, {} frames...".format(i, len(s_rollout)))
                np.savez(join(data_dir, 'rollout_{}'.format(i)),
                         observations=np.array(s_rollout),
                         rewards=np.array(r_rollout),
                         actions=np.array(a_rollout),
                         terminals=np.array(d_rollout))
                break
예제 #15
0
def train_one(fetch=False):
    """
    train an agent
    """
    if fetch:
        df = fetch_and_store()
    else:
        df = load()

    counts = df[['date', 'tic']].groupby(['date']).count().tic
    assert counts.min() == counts.max()

    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        # use_turbulence=False,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    # Training & Trading data split
    start_date, trade_date, end_date = calculate_split(df,
                                                       start=config.START_DATE)
    print(start_date, trade_date, end_date)
    train = data_split(processed, start_date, trade_date)
    trade = data_split(processed, trade_date, end_date)

    print(
        f'\n******\nRunning from {start_date} to {end_date} for:\n{", ".join(config.CRYPTO_TICKER)}\n******\n'
    )

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + (2 * stock_dimension) +
                   (len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension))

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 100000,
        "buy_cost_pct": 0.0026,
        "sell_cost_pct": 0.0026,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  make_plots=True,
                                  **env_kwargs)

    env_train, _ = e_train_gym.get_sb_env()
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime(config.DATETIME_FMT)

    model_sac = agent.get_model("sac")
    trained_sac = agent.train_model(
        model=model_sac,
        tb_log_name="sac",
        # total_timesteps=100
        total_timesteps=80000)

    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        # model=trained_sac, test_data=trade, test_env=env_trade, test_obs=obs_trade
        trained_sac,
        e_trade_gym)
    df_account_value.to_csv(
        f"./{config.RESULTS_DIR}/df_account_value_{now}.csv")
    df_actions.to_csv(f"./{config.RESULTS_DIR}/df_actions_{now}.csv")

    df_txns = pd.DataFrame(e_trade_gym.transactions,
                           columns=['date', 'amount', 'price', 'symbol'])
    df_txns = df_txns.set_index(pd.DatetimeIndex(df_txns['date'], tz=pytz.utc))
    df_txns.to_csv(f'./{config.RESULTS_DIR}/df_txns_{now}.csv')

    df_positions = pd.DataFrame(e_trade_gym.positions,
                                columns=['date', 'cash'] +
                                config.CRYPTO_TICKER)
    df_positions = df_positions.set_index(
        pd.DatetimeIndex(df_positions['date'],
                         tz=pytz.utc)).drop(columns=['date'])
    df_positions['cash'] = df_positions.astype(
        {col: np.float64
         for col in df_positions.columns})
    df_positions.to_csv(f'./{config.RESULTS_DIR}/df_positions_{now}.csv')

    print("==============Get Backtest Results===========")
    perf_stats_all = backtest_stats(df_account_value,
                                    transactions=df_txns,
                                    positions=df_positions)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv(f"./{config.RESULTS_DIR}/perf_stats_all_{now}.csv")

    backtest_plot(df_account_value,
                  baseline_start=trade_date,
                  baseline_end=end_date,
                  positions=df_positions,
                  transactions=df_txns)
예제 #16
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(
        start_date=config.START_DATE,
        end_date=config.END_DATE,
        ticker_list=config.DOW_30_TICKER,
    ).fetch_data()
    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    list_ticker = processed["tic"].unique().tolist()
    list_date = list(
        pd.date_range(processed['date'].min(),
                      processed['date'].max()).astype(str))
    combination = list(itertools.product(list_date, list_ticker))

    processed_full = pd.DataFrame(combination,
                                  columns=["date",
                                           "tic"]).merge(processed,
                                                         on=["date", "tic"],
                                                         how="left")
    processed_full = processed_full[processed_full['date'].isin(
        processed['date'])]
    processed_full = processed_full.sort_values(['date', 'tic'])

    processed_full = processed_full.fillna(0)

    # Training & Trading data split
    train = data_split(processed_full, config.START_DATE,
                       config.START_TRADE_DATE)
    trade = data_split(processed_full, config.START_TRADE_DATE,
                       config.END_DATE)

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + 2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")

    model_sac = agent.get_model("sac")
    trained_sac = agent.train_model(model=model_sac,
                                    tb_log_name="sac",
                                    total_timesteps=80000)

    print("==============Start Trading===========")
    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)

    df_account_value, df_actions = DRLAgent.DRL_prediction(
        model=trained_sac, environment=e_trade_gym)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" +
                            now + ".csv")
    df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now +
                      ".csv")

    print("==============Get Backtest Results===========")
    perf_stats_all = backtest_stats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + ".csv")
예제 #17
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(start_date=config.START_DATE,
                         end_date=config.END_DATE,
                         ticker_list=config.SP_500_TICKER).fetch_data()
    print("==============Start Feature Engineering===========")
    df = FeatureEngineer(df, use_technical_indicator=True,
                         use_turbulence=True).preprocess_data()

    # Training & Trade data split
    train = data_split(df, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(df, config.START_TRADE_DATE, config.END_DATE)

    # data normalization
    #feaures_list = list(train.columns)
    #feaures_list.remove('date')
    #feaures_list.remove('tic')
    #feaures_list.remove('close')
    #print(feaures_list)
    #data_normaliser = preprocessing.StandardScaler()
    #train[feaures_list] = data_normaliser.fit_transform(train[feaures_list])
    #trade[feaures_list] = data_normaliser.fit_transform(trade[feaures_list])

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = 1 + 2 * stock_dimension + len(
        config.TECHNICAL_INDICATORS_LIST) * stock_dimension

    env_setup = EnvSetup(stock_dim=stock_dimension,
                         state_space=state_space,
                         hmax=100,
                         initial_amount=3000,
                         transaction_cost_pct=0.001)

    env_train = env_setup.create_env_training(data=train,
                                              env_class=StockEnvTrain)
    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

    a2c_params_tuning = {
        'n_steps': 5,
        'ent_coef': 0.005,
        'learning_rate': 0.0007,
        'verbose': 0,
        'timesteps': 80000
    }

    model = agent.train_A2C(model_name="A2C_{}".format(now),
                            model_params=a2c_params_tuning)

    print("==============Start Trading===========")
    env_trade, obs_trade = env_setup.create_env_trading(
        data=trade, env_class=StockEnvTrade, turbulence_threshold=250)

    df_account_value, df_actions = DRLAgent.DRL_prediction(model=model,
                                                           test_data=trade,
                                                           test_env=env_trade,
                                                           test_obs=obs_trade)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" +
                            now + '.csv')
    df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now +
                      '.csv')

    print("==============Get Backtest Results===========")
    perf_stats_all = BackTestStats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + '.csv')
예제 #18
0
파일: data_utils.py 프로젝트: nmarzz/ai-fin
def get_dataset(datadir,data_type,start_date,end_date):

    if not data_type in config.SUPPORTED_DATA:
        raise ValueError('Market type not supported')


    data_path = os.path.join(datadir,data_type + '.csv')

    if not os.path.exists(data_path):
        if data_type == 'dow29':
            # If we don't have the data, we can download dow data from yahoo finance
            stock_tickers = config.DOW_30_TICKER_MINUS_VISA
            indicators = config.TECHNICAL_INDICATORS_LIST
            print('Getting Data: ')
            df = YahooDownloader(start_date = '2000-01-01',
                                 end_date = '2021-01-01',
                                 ticker_list = stock_tickers).fetch_data()

            fe = FeatureEngineer(
                            use_technical_indicator=True,
                            tech_indicator_list = indicators,
                            use_turbulence=True,
                            user_defined_feature = False)




            print('Adding Indicators')
            processed = fe.preprocess_data(df)

            list_ticker = processed["tic"].unique().tolist()
            list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
            combination = list(itertools.product(list_date,list_ticker))

            processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
            processed_full = processed_full[processed_full['date'].isin(processed['date'])]
            processed_full = processed_full.sort_values(['date','tic'])

            processed_full = processed_full.fillna(0)
            processed.to_csv(data_path,index = False)

        elif data_type == 'nas29':
            # If we don't have the data, we can download dow data from yahoo finance
            stock_tickers = config.NAS_29_TICKER
            indicators = config.TECHNICAL_INDICATORS_LIST
            print('Getting Data: ')
            df = YahooDownloader(start_date = '2000-01-01',
                                 end_date = '2021-01-01',
                                 ticker_list = stock_tickers).fetch_data()

            fe = FeatureEngineer(
                            use_technical_indicator=True,
                            tech_indicator_list = indicators,
                            use_turbulence=True,
                            user_defined_feature = False)




            print('Adding Indicators')
            processed = fe.preprocess_data(df)

            list_ticker = processed["tic"].unique().tolist()
            list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
            combination = list(itertools.product(list_date,list_ticker))

            processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
            processed_full = processed_full[processed_full['date'].isin(processed['date'])]
            processed_full = processed_full.sort_values(['date','tic'])

            processed_full = processed_full.fillna(0)
            processed.to_csv(data_path,index = False)

        elif data_type == 'dow290':
            raise ValueError('Need to add dow29 with zeros crossings to data directory')
        elif data_type == 'dow29w0':
            raise ValueError('Need to add dow29 with OUT zeros crossings to data directory')
        else:
            raise ValueError('Need to add crypto data to data directory')

    # Load and subset data
    full_df = pd.read_csv(data_path)

    max_date = max(full_df['date'])
    min_date = min(full_df['date'])


    if not (min_date <= start_date):
        warnings.warn('Earliest possible start date is {}: You have chosen {}. The later date will be used'.format(min_date,start_date))
    if not (max_date >= end_date):
        warnings.warn('Latest possible end date is {}: You have chosen {}. The earlier date will be used'.format(max_date,end_date))

    to_return = data_split(full_df,start_date,end_date)


    return to_return
예제 #19
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(
        start_date=config.START_DATE,
        end_date=config.END_DATE,
        ticker_list=['FXAIX'],
    ).fetch_data()
    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    # Training & Trading data split
    train = data_split(processed, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + 2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)
    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }
    e_train_gym = StockTradingEnv(df=train, **env_kwargs)
    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")
    user_input = input('train model? 1 train 0 don\'t train')
    if user_input == 1:
        model_sac = agent.get_model("sac")
        trained_sac = agent.train_model(model=model_sac,
                                        tb_log_name="sac",
                                        total_timesteps=8000)
        trained_sac.save("../models/sac_8k" + df.tic[0] + "_frl")
    else:
        trained_sac = SAC.load('../models/sac_80k_msft_working')
    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(
        trained_sac, e_trade_gym)
    df_account_value.to_csv("../" + config.RESULTS_DIR +
                            "/SAC_df_account_value_" + df.tic[0] + "_" + now +
                            ".csv")
    df_actions.to_csv("../" + config.RESULTS_DIR + "/SAC_df_actions_" +
                      df.tic[0] + "_" + now + ".csv")

    # print("==============Get Backtest Results===========")
    perf_stats_all = backtest_stats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("../" + config.RESULTS_DIR + "/SAC_perf_stats_all_" +
                          df.tic[0] + "_" + now + ".csv")

    #plot acc value
    actions = df_actions['actions']
    x = np.arange(0, df_account_value['account_value'].shape[0])
    y = df_account_value['account_value']

    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)

    fig, axs = plt.subplots(2, 1, sharex=True, sharey=False)

    # plt.plot(x, y)

    # Use a boundary norm instead
    cmap = ListedColormap(['r', 'g', 'b'])
    norm = BoundaryNorm([-100, -0.1, 0.1, 100], cmap.N)
    lc = LineCollection(segments, cmap=cmap, norm=norm)
    lc.set_array(actions)
    lc.set_linewidth(2)
    line = axs[0].add_collection(lc)
    # fig.colorbar(line, ax=axs)

    axs[1].set_xlabel('Trading Day (' + 'From ' + config.START_TRADE_DATE +
                      " to " + config.END_DATE + ')')
    axs[0].set_ylabel('Account Value (10000 of USD)')
    axs[0].set_title("Trading Test on " + df.tic[0])

    axs[0].set_xlim(x.min(), x.max())
    axs[0].set_ylim(y.min(), y.max())

    custom_lines = [
        Line2D([0], [0], color=cmap(0.), lw=4),
        Line2D([0], [0], color=cmap(.5), lw=4),
        Line2D([0], [0], color=cmap(1.), lw=4)
    ]

    # lines = ax.plot(data)
    axs[0].legend(custom_lines, ['Sell', 'Hold', 'Buy'])

    #plot stock value
    tx = np.arange(0, df_account_value['account_value'].shape[0])
    ty = trade['close']
    plt.ylabel('Price (USD)')
    plt.title(df.tic[0] + " Closing Price")
    plt.plot(tx, ty)

    plt.savefig("../" + config.RESULTS_DIR + "/plots/"
                "SAC_plot_" + df.tic[0] + "_" + now + ".png")
예제 #20
0
                        metavar='<trade_date>',
                        help='trading start date')
    args = parser.parse_args()
    consumer = KafkaConsumer(args.topic, auto_offset_reset='latest', \
            bootstrap_servers=args.hosts, api_version=(0, 10), consumer_timeout_ms=1000)

    producer = KafkaProducer(bootstrap_servers=args.hosts, api_version=(0, 10))
    # data initialization
    tday = datetime.date.today()
    yday = tday - datetime.timedelta(days=1)
    fmt = "%Y-%m-%d"
    numerical_df = YahooDownloader(args.start_date, args.end_date,
                                   config.stock_tickers).fetch_data()
    sentiment_df = generate_sentiment_scores(args.start_date, args.end_date)
    initial_data = get_initial_data(numerical_df, sentiment_df)
    data_processor = DataProcessor(FeatureEngineer(), initial_data)

    new_numerical = YahooDownloader(datetime.datetime.strftime(yday, fmt),
                                    datetime.datetime.strftime(tday, fmt),
                                    config.stock_tickers).fetch_data()
    new_sentiment = generate_sentiment_scores(
        datetime.datetime.strftime(yday, fmt),
        datetime.datetime.strftime(yday, fmt))
    # set up model to train on initial data
    load_path = "./trained_models/a2c_2019-2020_80k.zip"
    model = setup_model(initial_data)

    while consumer is None:
        sleep(20)

    event = Event()
예제 #21
0

# In[8]:


df = YahooDownloader(start_date = config.START_DATE,
                     end_date = config.END_DATE,
                     ticker_list = config.DOW_30_TICKER).fetch_data()


# # Part 4: Preprocess Data
# In[9]:

fe = FeatureEngineer(
                    use_technical_indicator=True,
                    tech_indicator_list = config.TECHNICAL_INDICATORS_LIST,
                    use_turbulence=True,
                    user_defined_feature = False)
processed = fe.preprocess_data(df)

# %% Show turbulence
# if error open VSCode Settings (JSON) and change 
# "terminal.integrated.inheritEnv" to true

import matplotlib.pyplot as plt
import pandas as pd

df = plotdf=processed[processed['tic']=='JPM']
df.plot(x="date", y=["turbulence", "close"])
plt.show()
예제 #22
0
def train_one():
    """
    train an agent
    """
    print("==============Start Fetching Data===========")
    df = YahooDownloader(
        start_date=config.START_DATE,
        end_date=config.END_DATE,
        ticker_list=config.DOW_30_TICKER,
    ).fetch_data()
    print("==============Start Feature Engineering===========")
    fe = FeatureEngineer(
        use_technical_indicator=True,
        tech_indicator_list=config.TECHNICAL_INDICATORS_LIST,
        use_turbulence=True,
        user_defined_feature=False,
    )

    processed = fe.preprocess_data(df)

    # Training & Trading data split
    train = data_split(processed, config.START_DATE, config.START_TRADE_DATE)
    trade = data_split(processed, config.START_TRADE_DATE, config.END_DATE)

    # calculate state action space
    stock_dimension = len(train.tic.unique())
    state_space = (1 + 2 * stock_dimension +
                   len(config.TECHNICAL_INDICATORS_LIST) * stock_dimension)

    env_kwargs = {
        "hmax": 100,
        "initial_amount": 1000000,
        "buy_cost_pct": 0.001,
        "sell_cost_pct": 0.001,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST,
        "action_space": stock_dimension,
        "reward_scaling": 1e-4
    }

    e_train_gym = StockTradingEnv(df=train, **env_kwargs)

    e_trade_gym = StockTradingEnv(df=trade,
                                  turbulence_threshold=250,
                                  **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    env_trade, obs_trade = e_trade_gym.get_sb_env()

    agent = DRLAgent(env=env_train)

    print("==============Model Training===========")
    now = datetime.datetime.now().strftime("%Y%m%d-%Hh%M")

    model_sac = agent.get_model("sac")
    trained_sac = agent.train_model(model=model_sac,
                                    tb_log_name="sac",
                                    total_timesteps=80000)

    print("==============Start Trading===========")
    df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_sac,
                                                           test_data=trade,
                                                           test_env=env_trade,
                                                           test_obs=obs_trade)
    df_account_value.to_csv("./" + config.RESULTS_DIR + "/df_account_value_" +
                            now + ".csv")
    df_actions.to_csv("./" + config.RESULTS_DIR + "/df_actions_" + now +
                      ".csv")

    print("==============Get Backtest Results===========")
    perf_stats_all = BackTestStats(df_account_value)
    perf_stats_all = pd.DataFrame(perf_stats_all)
    perf_stats_all.to_csv("./" + config.RESULTS_DIR + "/perf_stats_all_" +
                          now + ".csv")