Ejemplo n.º 1
0
def run(learning_steps=4300,
        verbose=0,
        n_steps=20,
        gamma=0.99,
        learning_rate=7e-4,
        ent_coef=0.01,
        tensorboard_log="tensorboard"):
    global inner_env
    inner_env = gym.make(
        'gym_threshold:extended-state-semi-fixed-end-not-adapted-v0')
    env = DummyVecEnv([lambda: inner_env])

    model = ACER(MlpPolicy,
                 env,
                 verbose=verbose,
                 n_steps=n_steps,
                 gamma=gamma,
                 ent_coef=ent_coef,
                 learning_rate=learning_rate,
                 tensorboard_log=tensorboard_log)
    model.learn(total_timesteps=learning_steps,
                tb_log_name=os.path.basename(__file__).rstrip(".py"),
                callback=tensorboard_callback)

    env.close()
Ejemplo n.º 2
0
def train(env_id, num_timesteps, seed, policy, lr_schedule, num_cpu):
    """
    train an ACER model on atari

    :param env_id: (str) Environment ID
    :param num_timesteps: (int) The total number of samples
    :param seed: (int) The initial seed for training
    :param policy: (A2CPolicy) The policy model to use (MLP, CNN, LSTM, ...)
    :param lr_schedule: (str) The type of scheduler for the learning rate update ('linear', 'constant',
                                 'double_linear_con', 'middle_drop' or 'double_middle_drop')
    :param num_cpu: (int) The number of cpu to train on
    """
    env = VecFrameStack(make_atari_env(env_id, num_cpu, seed), 4)
    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = CnnLstmPolicy
    else:
        warnings.warn("Policy {} not implemented".format(policy))
        return

    model = ACER(policy_fn, env, lr_schedule=lr_schedule, buffer_size=5000)
    model.learn(total_timesteps=int(num_timesteps * 1.1), seed=seed)
    env.close()
    # Free memory
    del model
Ejemplo n.º 3
0
def acer(env_id, log_dir, timesteps):
    # Create log dir
    os.makedirs(log_dir, exist_ok=True)

    # Create and wrap the environment
    env = gym.make(env_id)
    env = Monitor(env, log_dir, allow_early_resets=True)
    env = DummyVecEnv([lambda: env])

    model = ACER(MlpPolicy, env, verbose=0)
    # Train the agent
    print("Beginning training episodes with ACER.")
    model.learn(total_timesteps=timesteps)

    env.close()
Ejemplo n.º 4
0
def evaluate_policy(model,
                    eval_data,
                    runs_per_env: int,
                    n_vars: int,
                    episode_length: int,
                    display: bool,
                    printing: bool,
                    wrapped_env: bool = False) -> np.array:
    if type(model) == str:
        model = ACER.load(model)
    differences = []
    for fcm in eval_data:
        target_graph = CausalGraphGenerator.create_graph_from_fcm(fcm)

        for run in range(runs_per_env):
            predicted_graph = apply_policy(model=model,
                                           test_env=fcm,
                                           n_vars=n_vars,
                                           episode_length=episode_length,
                                           display=display,
                                           env_type='Gauss',
                                           printing=printing,
                                           wrapped_env=wrapped_env)

            difference = directed_shd(predicted_graph, target_graph)
            differences.append(difference)
            print('.')

    differences = np.array(differences)
    return differences
Ejemplo n.º 5
0
def NewPotential(current_window, algorithm='PPO'):

    # Determine the pretrained agent
    if algorithm == 'A2C':
        model = A2C.load("pretrained_A2C")
    elif algorithm == 'PPO':
        model = PPO2.load("pretrained_PPO")
    elif algorithm == 'ACKTR':
        model = ACKTR.load("pretrained_ACKTR")
    elif algorithm == 'ACER':
        model = ACER.load("pretrained_ACER")
    else:
        raise ValueError("%s is not a valid algorithm." % algorithm)

    if len(current_window) != model.observation_space.shape[0]:
        raise ValueError("%s is does not match the model's window size." %
                         len(current_window))

    action, _states = model.predict(current_window, deterministic=False)

    voltages = np.linspace(0, 1, num=model.action_space.n)
    if action >= 0 and action <= model.action_space.n - 1:
        voltage = voltages[action]
    else:
        raise ValueError(
            "Received invalid action={} which is not part of the action space".
            format(action))

    return voltage
Ejemplo n.º 6
0
def test_action_mask_run_acer(vec_env, policy, env_class):
    env = vec_env([env_class])

    model = ACER(policy, env, verbose=0)

    obs, done, action_masks = env.reset(), [False], []
    while not done[0]:
        action, _states = model.predict(obs, action_mask=action_masks)
        obs, _, done, infos = env.step(action)

        action_masks.clear()
        for info in infos:
            env_action_mask = info.get('action_mask')
            action_masks.append(env_action_mask)

    env.close()
Ejemplo n.º 7
0
def get_acer(vec_env=None,
             policy='CnnPolicy',
             learning_rate=7e-4,
             n_steps=20,
             max_grad_norm=10,
             lr_schedule='linear',
             buffer_size=5000,
             replay_start=1000) -> ACER:
    """
    Parameter's default values are taken from stable_baselines.acer.acer_simple.py
    """
    if vec_env is None:
        vec_env = create_training_env(1)
    return ACER(policy=policy,
                env=vec_env,
                gamma=0.99,
                n_steps=n_steps,
                num_procs=None,
                q_coef=0.5,
                ent_coef=0.01,
                max_grad_norm=max_grad_norm,
                learning_rate=learning_rate,
                lr_schedule=lr_schedule,
                rprop_alpha=0.99,
                rprop_epsilon=1e-5,
                buffer_size=buffer_size,
                replay_ratio=4,
                replay_start=replay_start,
                correction_term=10.0,
                trust_region=True,
                alpha=0.99,
                delta=1,
                verbose=2)
Ejemplo n.º 8
0
    def _init_model(self):
        if not self._model_kwargs["agent"].lower() == "acer":
            raise ValueError(
                "The model_kwargs dict has to be created using args from  ACER agent as reference. Make sure the correct parameters models."
            )

        del self._model_kwargs["agent"]

        self._callback_checkpoint_kwargs["save_freq"] = int(
            self._callback_checkpoint_kwargs["save_freq"] / self._num_cpus)

        if self._env_kwargs["extractor"] == "mlp":
            self._model = ACER(CustomMlpPolicy, self._env,
                               **self._model_kwargs)
        else:
            self._model = ACER(CustomCnnPolicy, self._env,
                               **self._model_kwargs)
Ejemplo n.º 9
0
def get_existing_model(model_path):

    print('--- Training from existing model', model_path, '---')

    # Load model
    model = ACER.load(model_path)

    return model
Ejemplo n.º 10
0
def loader(algo, env_name):
    if algo == 'dqn':
        return DQN.load("trained_agents/" + algo + "/" + env_name + ".pkl")
    elif algo == 'ppo2':
        return PPO2.load("trained_agents/" + algo + "/" + env_name + ".pkl")
    elif algo == 'a2c':
        return A2C.load("trained_agents/" + algo + "/" + env_name + ".pkl")
    elif algo == 'acer':
        return ACER.load("trained_agents/" + algo + "/" + env_name + ".pkl")
    elif algo == 'trpo':
        return TRPO.load("trained_agents/" + algo + "/" + env_name + ".pkl")
Ejemplo n.º 11
0
def train():
    """Trains an ACER policy """
    env = create_env()

    model = ACER(policy=CnnPolicy,
                 env=env,
                 gamma=0.99,
                 n_steps=20,
                 num_procs=4,
                 q_coef=0.5,
                 ent_coef=0.01,
                 max_grad_norm=10,
                 learning_rate=0.0007,
                 lr_schedule='linear',
                 rprop_alpha=0.99,
                 rprop_epsilon=1e-05,
                 buffer_size=5000,
                 replay_ratio=4,
                 replay_start=1000,
                 correction_term=10.0,
                 trust_region=True,
                 alpha=0.99,
                 delta=1,
                 verbose=1,
                 tensorboard_log="./tb")

    model.learn(total_timesteps=int(1e7),
                callback=callback,
                tb_log_name="acer")

    model.save("models/pacman_acer.pkl")
Ejemplo n.º 12
0
def train_acer(seed):
    """
    test ACER on the uav_env(cartesian,discrete)
    :param seed: random seed
    :return: evaluation
    """
    """
    ACER(policy, env, gamma=0.99, n_steps=20, num_procs=1, q_coef=0.5, ent_coef=0.01,
    max_grad_norm=10, learning_rate=0.0007, lr_schedule='linear', rprop_alpha=0.99,
    rprop_epsilon=1e-05, buffer_size=5000, replay_ratio=4, replay_start=1000, 
    correction_term=10.0, trust_region=True, alpha=0.99, delta=1, verbose=0, 
    tensorboard_log=None, _init_setup_model=True)
    """
    algo = 'ACER'
    num_timesteps = 3000000

    env = set_up_env(seed)

    global best_mean_reward, n_steps
    best_mean_reward, n_steps = -np.inf, 0

    model = ACER(policy=MlpPolicy, env=env, gamma=0.99, n_steps=20, num_procs=1,
                 q_coef=0.5, ent_coef=0.01, max_grad_norm=10, learning_rate=0.0007,
                 lr_schedule='linear', rprop_alpha=0.99, rprop_epsilon=1e-05,
                 buffer_size=5000, replay_ratio=4, replay_start=1000,
                 correction_term=10.0, trust_region=True, alpha=0.99, delta=1,
                 verbose=0, tensorboard_log="./logs/{}/tensorboard/{}/".format(EXPERIMENT_NATURE, algo))

    model.learn(total_timesteps=num_timesteps, callback=callback, seed=seed,
                log_interval=500, tb_log_name="seed_{}".format(seed))

    model = ACER.load(log_dir + 'best_model.pkl')

    evaluation = evaluate_model(env, model, 100)
    os.makedirs('./logs/{}/csv/{}/'.format(EXPERIMENT_NATURE, algo), exist_ok=True)
    os.rename('/tmp/gym/monitor.csv', "./logs/{}/csv/{}/seed_{}.csv".format(EXPERIMENT_NATURE, algo, seed))
    env.close()
    del model, env
    gc.collect()
    return evaluation
Ejemplo n.º 13
0
def train_ACER(env_train, model_name, timesteps=25000):
    start = time.time()
    model = ACER('MlpPolicy', env_train, verbose=0)
    model.learn(total_timesteps=timesteps)
    end = time.time()

    model.save(f"{config.TRAINED_MODEL_DIR}/{model_name}")
    print('Training time (A2C): ', (end - start) / 60, ' minutes')
    return model
Ejemplo n.º 14
0
def load_model(path: str, algorithm: str):
    from stable_baselines import PPO2, DQN, A2C, ACER, GAIL, TRPO
    if algorithm == 'PPO2':
        return PPO2.load(path)
    if algorithm == 'DQN':
        return DQN.load(path)
    if algorithm == 'A2C':
        return A2C.load(path)
    if algorithm == 'ACER':
        return ACER.load(path)
    if algorithm == 'GAIL':
        return GAIL.load(path)
    if algorithm == 'TRPO':
        return TRPO.load(path)
    return None
Ejemplo n.º 15
0
def train_acer(timesteps, name):
    env = datares_roulette
    env = DummyVecEnv([env])
    model = ACER(
        stable_baselines.common.policies.MlpPolicy,
        env,
        verbose=1,
    )
    model.learn(total_timesteps=timesteps)
    model.save(name)
    return model
Ejemplo n.º 16
0
def setup_game():
    playing = True
    while (playing):
        games = input("Do you want to play 5, 10 or 20 games? ")
        if (games.replace(" ", "") == "1"):
            games = 1
            playing = False
        elif (games.replace(" ", "") == "5"):
            games = 5
            playing = False
        elif (games.replace(" ", "") == "10"):
            games = 10
            playing = False
        elif (games.replace(" ", "") == "20"):
            games = 20
            playing = False
        else:
            print("Unrecognized please try again!")

    playing = True
    while (playing):
        AIagent = input(
            "Do you want to play against PPO2(p)(1), A2C(a)(2) or ACER(c)(3) agent?"
        )
        if (AIagent.replace(" ", "").upper() == "p".upper()
                or AIagent.replace(" ", "").upper() == "ppo2".upper()
                or AIagent.replace(" ", "") == "1"):
            AIagent = PPO2.load("models/PPO2-qiscoin-v1-10k")
            ai_name = "PPO2"
            playing = False
        elif (AIagent.replace(" ", "").upper() == "a".upper()
              or AIagent.replace(" ", "").upper() == "a2c".upper()
              or AIagent.replace(" ", "") == "2"):
            AIagent = A2C.load("models/A2C-qiscoin-v1-10k")
            ai_name = "A2C"
            playing = False
        elif (AIagent.replace(" ", "").upper() == "c".upper()
              or AIagent.replace(" ", "").upper() == "acer".upper()
              or AIagent.replace(" ", "") == "3"):
            AIagent = ACER.load("models/ACER-qiscoin-v1-10k")
            ai_name = "ACER"
            playing = False
        else:
            print("Unrecognized please try again!")
    return games, AIagent, ai_name
Ejemplo n.º 17
0
def record_video():
    """Record of a video for an trained ACER agent"""
    model = ACER.load("models/pacman_acer.pkl", verbose=1)
    env = create_env()
    model.set_env(env)

    video_length = 3000
    env = wrap_video_env(env,
                         name="pacman_acer",
                         video_length=video_length,
                         path='videos/')

    state = env.reset()
    for _ in range(video_length + 1):
        action, _states = model.predict(state)
        state, _, _, _ = env.step(action)
    print("Video recorded")
    env.close()
Ejemplo n.º 18
0
from stable_baselines.common.cmd_util import make_atari_env
from stable_baselines.common.vec_env import VecFrameStack
from stable_baselines import ACER

# There already exists an environment generator
# that will make and wrap atari environments correctly.
# Here we are also multiprocessing training (num_env=4 => 4 processes)
env = make_atari_env('PongNoFrameskip-v4', num_env=4, seed=0)
# Frame-stacking with 4 frames
env = VecFrameStack(env, n_stack=4)

model = ACER('CnnPolicy', env, verbose=1)
model.learn(total_timesteps=25000)

# save
model.save("cnn_pong")

obs = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()
Ejemplo n.º 19
0
        env = CarEnv(out_dir, n_stacks=5, a_space_type=args.action)
        env.next_weather()
        env = Monitor(env, out_dir)

        print("==========Creating model------------------")
        policy = CnnPolicy
        if args.model == 'trpo':
            model = TRPO(policy,
                         env,
                         verbose=1,
                         timesteps_per_batch=64,
                         tensorboard_log=out_dir)
        elif args.model == 'acer':
            model = ACER(policy,
                         env,
                         verbose=1,
                         n_steps=64,
                         tensorboard_log=out_dir)
        elif args.model == 'ppo':
            model = PPO2(policy,
                         env,
                         verbose=1,
                         n_steps=64,
                         tensorboard_log=out_dir)
        elif args.model == 'acktr':
            model = ACKTR(policy,
                          env,
                          n_steps=4,
                          verbose=1,
                          tensorboard_log=out_dir)
        elif args.model == 'ddpg':
Ejemplo n.º 20
0
class LoadRLModel(IStrategy):
    stoploss = -0.50

    trailing_stop = False

    ticker_interval = '5m'

    # Run "populate_indicators()" only for new candle.
    process_only_new_candles = False

    startup_candle_count: int = 20

    model = ACER.load('model')

    def informative_pairs(self):
        return []

    def populate_indicators(self, dataframe: DataFrame,
                            metadata: dict) -> DataFrame:
        # Momentum Indicators
        # ------------------------------------

        # ADX
        dataframe['adx'] = ta.ADX(dataframe)

        # Plus Directional Indicator / Movement
        dataframe['plus_dm'] = ta.PLUS_DM(dataframe)
        dataframe['plus_di'] = ta.PLUS_DI(dataframe)

        # # Minus Directional Indicator / Movement
        dataframe['minus_dm'] = ta.MINUS_DM(dataframe)
        dataframe['minus_di'] = ta.MINUS_DI(dataframe)

        # Aroon, Aroon Oscillator
        aroon = ta.AROON(dataframe)
        dataframe['aroonup'] = aroon['aroonup']
        dataframe['aroondown'] = aroon['aroondown']
        dataframe['aroonosc'] = ta.AROONOSC(dataframe)

        # Awesome Oscillator
        dataframe['ao'] = qtpylib.awesome_oscillator(dataframe)

        # # Keltner Channel
        # keltner = qtpylib.keltner_channel(dataframe)
        # dataframe["kc_upperband"] = keltner["upper"]
        # dataframe["kc_lowerband"] = keltner["lower"]
        # dataframe["kc_middleband"] = keltner["mid"]
        # dataframe["kc_percent"] = (
        #     (dataframe["close"] - dataframe["kc_lowerband"]) /
        #     (dataframe["kc_upperband"] - dataframe["kc_lowerband"])
        # )
        # dataframe["kc_width"] = (
        #     (dataframe["kc_upperband"] - dataframe["kc_lowerband"]) / dataframe["kc_middleband"]
        # )

        # Ultimate Oscillator
        dataframe['uo'] = ta.ULTOSC(dataframe)

        # Commodity Channel Index: values [Oversold:-100, Overbought:100]
        dataframe['cci'] = ta.CCI(dataframe)

        # RSI
        dataframe['rsi'] = ta.RSI(dataframe)

        # Inverse Fisher transform on RSI: values [-1.0, 1.0] (https://goo.gl/2JGGoy)
        rsi = 0.1 * (dataframe['rsi'] - 50)
        dataframe['fisher_rsi'] = (np.exp(2 * rsi) - 1) / (np.exp(2 * rsi) + 1)

        # Inverse Fisher transform on RSI normalized: values [0.0, 100.0] (https://goo.gl/2JGGoy)
        dataframe['fisher_rsi_norma'] = 50 * (dataframe['fisher_rsi'] + 1)

        # Stochastic Slow
        stoch = ta.STOCH(dataframe)
        dataframe['slowd'] = stoch['slowd']
        dataframe['slowk'] = stoch['slowk']

        # Stochastic Fast
        stoch_fast = ta.STOCHF(dataframe)
        dataframe['fastd'] = stoch_fast['fastd']
        dataframe['fastk'] = stoch_fast['fastk']

        # Stochastic RSI
        stoch_rsi = ta.STOCHRSI(dataframe)
        dataframe['fastd_rsi'] = stoch_rsi['fastd']
        dataframe['fastk_rsi'] = stoch_rsi['fastk']

        # MACD
        macd = ta.MACD(dataframe)
        dataframe['macd'] = macd['macd']
        dataframe['macdsignal'] = macd['macdsignal']
        dataframe['macdhist'] = macd['macdhist']

        # MFI
        dataframe['mfi'] = ta.MFI(dataframe)

        # # ROC
        dataframe['roc'] = ta.ROC(dataframe)

        # Overlap Studies
        # ------------------------------------

        # # Bollinger Bands
        # bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(dataframe), window=20, stds=2)
        # dataframe['bb_lowerband'] = bollinger['lower']
        # dataframe['bb_middleband'] = bollinger['mid']
        # dataframe['bb_upperband'] = bollinger['upper']
        # dataframe["bb_percent"] = (
        #     (dataframe["close"] - dataframe["bb_lowerband"]) /
        #     (dataframe["bb_upperband"] - dataframe["bb_lowerband"])
        # )
        # dataframe["bb_width"] = (
        #     (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["bb_middleband"]
        # )

        # # Bollinger Bands - Weighted (EMA based instead of SMA)
        # weighted_bollinger = qtpylib.weighted_bollinger_bands(
        #     qtpylib.typical_price(dataframe), window=20, stds=2
        # )
        # dataframe["wbb_upperband"] = weighted_bollinger["upper"]
        # dataframe["wbb_lowerband"] = weighted_bollinger["lower"]
        # dataframe["wbb_middleband"] = weighted_bollinger["mid"]
        # dataframe["wbb_percent"] = (
        #     (dataframe["close"] - dataframe["wbb_lowerband"]) /
        #     (dataframe["wbb_upperband"] - dataframe["wbb_lowerband"])
        # )
        # dataframe["wbb_width"] = (
        #     (dataframe["wbb_upperband"] - dataframe["wbb_lowerband"]) /
        #     dataframe["wbb_middleband"]
        # )

        # # EMA - Exponential Moving Average
        # dataframe['ema3'] = ta.EMA(dataframe, timeperiod=3)
        # dataframe['ema5'] = ta.EMA(dataframe, timeperiod=5)
        # dataframe['ema10'] = ta.EMA(dataframe, timeperiod=10)
        # dataframe['ema21'] = ta.EMA(dataframe, timeperiod=21)
        # dataframe['ema50'] = ta.EMA(dataframe, timeperiod=50)
        # dataframe['ema100'] = ta.EMA(dataframe, timeperiod=100)

        # # SMA - Simple Moving Average
        # dataframe['sma3'] = ta.SMA(dataframe, timeperiod=3)
        # dataframe['sma5'] = ta.SMA(dataframe, timeperiod=5)
        # dataframe['sma10'] = ta.SMA(dataframe, timeperiod=10)
        # dataframe['sma21'] = ta.SMA(dataframe, timeperiod=21)
        # dataframe['sma50'] = ta.SMA(dataframe, timeperiod=50)
        # dataframe['sma100'] = ta.SMA(dataframe, timeperiod=100)

        # Parabolic SAR
        # dataframe['sar'] = ta.SAR(dataframe)

        # TEMA - Triple Exponential Moving Average
        # dataframe['tema'] = ta.TEMA(dataframe, timeperiod=9)

        # # Cycle Indicator
        # # ------------------------------------
        # # Hilbert Transform Indicator - SineWave
        # hilbert = ta.HT_SINE(dataframe)
        # dataframe['htsine'] = hilbert['sine']
        # dataframe['htleadsine'] = hilbert['leadsine']

        # # Pattern Recognition - Bullish candlestick patterns
        # # ------------------------------------
        # # Hammer: values [0, 100]
        # dataframe['CDLHAMMER'] = ta.CDLHAMMER(dataframe)
        # # Inverted Hammer: values [0, 100]
        # dataframe['CDLINVERTEDHAMMER'] = ta.CDLINVERTEDHAMMER(dataframe)
        # # Dragonfly Doji: values [0, 100]
        # dataframe['CDLDRAGONFLYDOJI'] = ta.CDLDRAGONFLYDOJI(dataframe)
        # # Piercing Line: values [0, 100]
        # dataframe['CDLPIERCING'] = ta.CDLPIERCING(dataframe) # values [0, 100]
        # # Morningstar: values [0, 100]
        # dataframe['CDLMORNINGSTAR'] = ta.CDLMORNINGSTAR(dataframe) # values [0, 100]
        # # Three White Soldiers: values [0, 100]
        # dataframe['CDL3WHITESOLDIERS'] = ta.CDL3WHITESOLDIERS(dataframe) # values [0, 100]

        # # Pattern Recognition - Bearish candlestick patterns
        # # ------------------------------------
        # # Hanging Man: values [0, 100]
        # dataframe['CDLHANGINGMAN'] = ta.CDLHANGINGMAN(dataframe)
        # # Shooting Star: values [0, 100]
        # dataframe['CDLSHOOTINGSTAR'] = ta.CDLSHOOTINGSTAR(dataframe)
        # # Gravestone Doji: values [0, 100]
        # dataframe['CDLGRAVESTONEDOJI'] = ta.CDLGRAVESTONEDOJI(dataframe)
        # # Dark Cloud Cover: values [0, 100]
        # dataframe['CDLDARKCLOUDCOVER'] = ta.CDLDARKCLOUDCOVER(dataframe)
        # # Evening Doji Star: values [0, 100]
        # dataframe['CDLEVENINGDOJISTAR'] = ta.CDLEVENINGDOJISTAR(dataframe)
        # # Evening Star: values [0, 100]
        # dataframe['CDLEVENINGSTAR'] = ta.CDLEVENINGSTAR(dataframe)

        # # Pattern Recognition - Bullish/Bearish candlestick patterns
        # # ------------------------------------
        # # Three Line Strike: values [0, -100, 100]
        # dataframe['CDL3LINESTRIKE'] = ta.CDL3LINESTRIKE(dataframe)
        # # Spinning Top: values [0, -100, 100]
        # dataframe['CDLSPINNINGTOP'] = ta.CDLSPINNINGTOP(dataframe) # values [0, -100, 100]
        # # Engulfing: values [0, -100, 100]
        # dataframe['CDLENGULFING'] = ta.CDLENGULFING(dataframe) # values [0, -100, 100]
        # # Harami: values [0, -100, 100]
        # dataframe['CDLHARAMI'] = ta.CDLHARAMI(dataframe) # values [0, -100, 100]
        # # Three Outside Up/Down: values [0, -100, 100]
        # dataframe['CDL3OUTSIDE'] = ta.CDL3OUTSIDE(dataframe) # values [0, -100, 100]
        # # Three Inside Up/Down: values [0, -100, 100]
        # dataframe['CDL3INSIDE'] = ta.CDL3INSIDE(dataframe) # values [0, -100, 100]

        # # Chart type
        # # ------------------------------------
        # # Heikin Ashi Strategy
        # heikinashi = qtpylib.heikinashi(dataframe)
        # dataframe['ha_open'] = heikinashi['open']
        # dataframe['ha_close'] = heikinashi['close']
        # dataframe['ha_high'] = heikinashi['high']
        # dataframe['ha_low'] = heikinashi['low']

        # Retrieve best bid and best ask from the orderbook
        # ------------------------------------
        """
        # first check if dataprovider is available
        if self.dp:
            if self.dp.runmode in ('live', 'dry_run'):
                ob = self.dp.orderbook(metadata['pair'], 1)
                dataframe['best_bid'] = ob['bids'][0][0]
                dataframe['best_ask'] = ob['asks'][0][0]
        """

        return dataframe

    def populate_buy_trend(self, dataframe: DataFrame,
                           metadata: dict) -> DataFrame:
        """
        Based on TA indicators, populates the buy signal for the given dataframe
        :param dataframe: DataFrame populated with indicators
        :param metadata: Additional information, like the currently traded pair
        :return: DataFrame with buy column
        """
        # dataframe.loc[
        #     (
        #         (qtpylib.crossed_above(dataframe['rsi'], 30)) &  # Signal: RSI crosses above 30
        #         (dataframe['tema'] <= dataframe['bb_middleband']) &  # Guard: tema below BB middle
        #         (dataframe['tema'] > dataframe['tema'].shift(1)) &  # Guard: tema is raising
        #         (dataframe['volume'] > 0)  # Make sure Volume is not 0
        #     ),
        #     'buy'] = 1
        action, nan_list = self.rl_model_redict(dataframe)
        dataframe.loc[action == 1, 'buy'] = 1
        dataframe.loc[nan_list == True, 'buy'] = 0
        return dataframe

    def populate_sell_trend(self, dataframe: DataFrame,
                            metadata: dict) -> DataFrame:
        """
        Based on TA indicators, populates the sell signal for the given dataframe
        :param dataframe: DataFrame populated with indicators
        :param metadata: Additional information, like the currently traded pair
        :return: DataFrame with buy column
        """
        # dataframe.loc[
        #     (
        #         (qtpylib.crossed_above(dataframe['rsi'], 70)) &  # Signal: RSI crosses above 70
        #         (dataframe['tema'] > dataframe['bb_middleband']) &  # Guard: tema above BB middle
        #         (dataframe['tema'] < dataframe['tema'].shift(1)) &  # Guard: tema is falling
        #         (dataframe['volume'] > 0)  # Make sure Volume is not 0
        #     ),
        #     'sell'] = 1
        action, nan_list = self.rl_model_redict(dataframe)
        dataframe.loc[action == 2, 'sell'] = 1
        dataframe.loc[nan_list == True, 'sell'] = 0
        return dataframe

    def rl_model_redict(self, dataframe):
        data = np.array(
            [
                dataframe['adx'],
                dataframe['plus_dm'],
                dataframe['plus_di'],
                dataframe['minus_dm'],
                dataframe['minus_di'],
                dataframe['aroonup'],
                dataframe['aroondown'],
                dataframe['aroonosc'],
                dataframe['ao'],
                # dataframe['kc_percent'],
                # dataframe['kc_width'],
                dataframe['uo'],
                dataframe['cci'],
                dataframe['rsi'],
                dataframe['fisher_rsi'],
                dataframe['slowd'],
                dataframe['slowk'],
                dataframe['fastd'],
                dataframe['fastk'],
                dataframe['fastd_rsi'],
                dataframe['fastk_rsi'],
                dataframe['macd'],
                dataframe['macdsignal'],
                dataframe['macdhist'],
                dataframe['mfi'],
                dataframe['roc'],
                # row['bb_percent'],
                # row['bb_width'],
                # row['wbb_percent'],
                # row['wbb_width'],
                # dataframe['htsine'],
                # dataframe['htleadsine'],
                # row['CDLHAMMER'],
                # row['CDLINVERTEDHAMMER'],
                # row['CDLDRAGONFLYDOJI'],
                # row['CDLPIERCING'],
                # row['CDLMORNINGSTAR'],
                # row['CDL3WHITESOLDIERS'],
                # row['CDLHANGINGMAN'],
                # row['CDLSHOOTINGSTAR'],
                # row['CDLGRAVESTONEDOJI'],
                # row['CDLDARKCLOUDCOVER'],
                # row['CDLEVENINGDOJISTAR'],
                # row['CDLEVENINGSTAR'],
                # row['CDL3LINESTRIKE'],
                # row['CDLSPINNINGTOP'],
                # row['CDLENGULFING'],
                # row['CDLHARAMI'],
                # row['CDL3OUTSIDE'],
                # row['CDL3INSIDE'],
                # trad_status,
                # (self.trade != None)
            ],
            dtype=np.float)

        data = data.reshape(-1, 24)

        nan_list = np.isnan(data).any(axis=1)
        data = np.nan_to_num(data)
        action, _ = self.model.predict(data, deterministic=True)

        return action, nan_list
Ejemplo n.º 21
0
class ACERAgent(Agent):
    def __init__(
        self,
        model_name="model_name",
        save_dir="./models",
        log_interval=1e4,
        num_cpus=8,
        eval_episodes=1000,
        n_steps=1e6,
        layer_normalization=False,
        model_kwargs={"tensorboard_log": "./tensorboards/"},
        env_kwargs={
            "board_size": 4,
            "binary": True,
            "extractor": "cnn"
        },
        callback_checkpoint_kwargs={
            "save_freq": 0,
            "save_path": "./models/",
            "name_prefix": "model_name"
        },
        callback_hist_kwargs={"hist_freq": 0},
    ):
        super().__init__(
            model_name,
            save_dir,
            num_cpus,
            model_kwargs,
            env_kwargs,
            layer_normalization,
            callback_checkpoint_kwargs,
            callback_hist_kwargs,
            n_steps,
            log_interval,
            eval_episodes,
        )
        self._init_model()

    def _init_model(self):
        if not self._model_kwargs["agent"].lower() == "acer":
            raise ValueError(
                "The model_kwargs dict has to be created using args from  ACER agent as reference. Make sure the correct parameters models."
            )

        del self._model_kwargs["agent"]

        self._callback_checkpoint_kwargs["save_freq"] = int(
            self._callback_checkpoint_kwargs["save_freq"] / self._num_cpus)

        if self._env_kwargs["extractor"] == "mlp":
            self._model = ACER(CustomMlpPolicy, self._env,
                               **self._model_kwargs)
        else:
            self._model = ACER(CustomCnnPolicy, self._env,
                               **self._model_kwargs)

    def train(self):
        "Optimize the model."
        callbacks = []

        # Checkpoint callback
        if self._callback_checkpoint_kwargs["save_freq"] > 0:

            # Append model name into checkpoint save_path
            self._callback_checkpoint_kwargs["save_path"] = (
                self._callback_checkpoint_kwargs["save_path"] + "/" +
                str(self._model_name))
            checkpoint_callback = CheckpointCallback(
                **self._callback_checkpoint_kwargs)
            callbacks.append(checkpoint_callback)

        if self._callback_hist_kwargs["hist_freq"] > 0:
            # hist_callback = CustomCallbackPPO2(**self._callback_hist_kwargs)
            # callbacks.append(hist_callback)
            pass

        try:
            self._model.learn(self._n_steps,
                              log_interval=self._log_interval,
                              callback=callbacks,
                              tb_log_name=self._model_name)
        except KeyboardInterrupt:
            pass

        folder_path = os.path.join(self._save_dir, self._model_name)
        self._model.save(os.path.join(folder_path, self._model_name))

    def test(self):
        "Evaluate the model."

        mean_reward = super()._test(self._model)
        return mean_reward
Ejemplo n.º 22
0
#cutoff_s=str(cutoffpenaltyscalar).split('.')[0]
#rg_s=max(str(float(rg_prob)).split('.'))
turnspc_s=str(turnspc).split('.')[1]

scenario=str(f'{trialv}_{inputfile_s}_t{test}_lr{LR_s}_g{gamma_s}')  
savepath='./output/%s' % scenario




for n in range(500):
    turns=round(random.random()*x*y*z*turnspc)
    env = environment(x,y,z,gamma, turnspc, policyname, rg_prob='loadenv')
    
    # Instantiate the agent
    model = ACER(policy, env, gamma=gamma, n_steps=episodetimesteps, learning_rate=LR,  buffer_size=10000,  verbose=1)
    
    # Load the trained agent
    model = ACER.load("%s/best_model" % savepath)
    
    # Evaluate the agent
    #mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
    
    # Enjoy trained agent
    obs = env.reset()
    for i in range(turns):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        #print(action, rewards, dones)
        #env.renderif('on')
        if dones == True:
Ejemplo n.º 23
0
env = gym.make(env_name)
env = Monitor(env, agent_dir)


if model_name == 'dqn':
    from stable_baselines.deepq.policies import MlpPolicy
    from stable_baselines import DQN
    
    model = DQN(MlpPolicy, env, verbose=1, exploration_fraction=0.9, exploration_final_eps=0.5)
    model.learn(total_timesteps=1000000, log_interval=1)
    model.save(agent_dir + 'agent')

elif model_name == 'ppo1':
    from stable_baselines.common.policies import MlpPolicy
    from stable_baselines import PPO1

    model = PPO1(MlpPolicy, env, verbose=1)
    model.learn(total_timesteps=1000000, log_interval=1)
    model.save(agent_dir + 'agent')

elif model_name == 'acer':
    from stable_baselines.common.policies import MlpPolicy
    from stable_baselines import ACER 
    model = ACER(MlpPolicy, env, verbose=1)
    model.learn(total_timesteps=1000000, log_interval=1)
    model.save(agent_dir + 'agent')
 
else:
    print('Usage: python training.py <env> <model> <agent_name>')
Ejemplo n.º 24
0
        '8': 'data/fcms/gauss/8x0_5000.pkl'}
ep_lengths = {'6': 30,
              '8': 40}
runs_per_env = 1
collected_data = {'vars': [],
                  'env': [],
                  'run': [],
                  'algo': [],
                  'time': []}

run_experiment = True
analyze_experiment = True

if run_experiment:
    for var in vars:
        model = ACER.load(model_paths[var])
        envs = FCMGenerator.load_dataset(data[var])[:500]
        env_counter = 0
        print(var+' var environments')
        bar = tqdm(total=len(envs)*len(algos)*runs_per_env)
        for env in envs:
            # create fcm environment for our algo
            fcm_env = FCMEnvironment(agent=DiscreteAgent(int(var), env_type='Gauss'),
                                     fcm=env,
                                     eval_func=NoEval())
            # collect obs data for notears and GES
            obs_data = DataFrame(columns=['X'+str(i) for i in range(int(var))])
            for i in range(1000):
                inst = env.get_next_instantiation()[0]
                obs_data = obs_data.append({'X' + str(i): float(inst[i]) for i in range(len(inst))},
                                           ignore_index=True)
Ejemplo n.º 25
0
def test_acer(name):
    model_path = os.path.join('models', name)
    model = ACER.load(model_path)
    return model
Ejemplo n.º 26
0
import pytest

from stable_baselines import A2C, ACER, ACKTR, DeepQ, DDPG, PPO1, PPO2, TRPO
from stable_baselines.ddpg import AdaptiveParamNoiseSpec
from stable_baselines.common.identity_env import IdentityEnv, IdentityEnvBox
from stable_baselines.common.vec_env import DummyVecEnv

PARAM_NOISE_DDPG = AdaptiveParamNoiseSpec(initial_stddev=float(0.2),
                                          desired_action_stddev=float(0.2))

# Hyperparameters for learning identity for each RL model
LEARN_FUNC_DICT = {
    'a2c':
    lambda e: A2C(policy="MlpPolicy", env=e).learn(total_timesteps=1000),
    'acer':
    lambda e: ACER(policy="MlpPolicy", env=e).learn(total_timesteps=1000),
    'acktr':
    lambda e: ACKTR(policy="MlpPolicy", env=e).learn(total_timesteps=1000),
    'deepq':
    lambda e: DeepQ(policy="MlpPolicy", env=e).learn(total_timesteps=1000),
    'ddpg':
    lambda e: DDPG(policy="MlpPolicy", env=e, param_noise=PARAM_NOISE_DDPG).
    learn(total_timesteps=1000),
    'ppo1':
    lambda e: PPO1(policy="MlpPolicy", env=e).learn(total_timesteps=1000),
    'ppo2':
    lambda e: PPO2(policy="MlpPolicy", env=e).learn(total_timesteps=1000),
    'trpo':
    lambda e: TRPO(policy="MlpPolicy", env=e).learn(total_timesteps=1000),
}
Ejemplo n.º 27
0
import pytest
import numpy as np

from stable_baselines import A2C, ACER, ACKTR, DQN, DDPG, SAC, PPO1, PPO2, TD3, TRPO
from stable_baselines.ddpg import NormalActionNoise
from stable_baselines.common.identity_env import IdentityEnv, IdentityEnvBox
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines.common.evaluation import evaluate_policy


# Hyperparameters for learning identity for each RL model
LEARN_FUNC_DICT = {
    'a2c': lambda e: A2C(policy="MlpPolicy", learning_rate=1e-3, n_steps=1,
                         gamma=0.7, env=e, seed=0).learn(total_timesteps=10000),
    'acer': lambda e: ACER(policy="MlpPolicy", env=e, seed=0,
                           n_steps=1, replay_ratio=1).learn(total_timesteps=15000),
    'acktr': lambda e: ACKTR(policy="MlpPolicy", env=e, seed=0,
                             learning_rate=5e-4, n_steps=1).learn(total_timesteps=20000),
    'dqn': lambda e: DQN(policy="MlpPolicy", batch_size=16, gamma=0.1,
                         exploration_fraction=0.001, env=e, seed=0).learn(total_timesteps=40000),
    'ppo1': lambda e: PPO1(policy="MlpPolicy", env=e, seed=0, lam=0.5,
                           optim_batchsize=16, optim_stepsize=1e-3).learn(total_timesteps=15000),
    'ppo2': lambda e: PPO2(policy="MlpPolicy", env=e, seed=0,
                           learning_rate=1.5e-3, lam=0.8).learn(total_timesteps=20000),
    'trpo': lambda e: TRPO(policy="MlpPolicy", env=e, seed=0,
                           max_kl=0.05, lam=0.7).learn(total_timesteps=10000),
}


@pytest.mark.slow
@pytest.mark.parametrize("model_name", ['a2c', 'acer', 'acktr', 'dqn', 'ppo1', 'ppo2', 'trpo'])
Ejemplo n.º 28
0
import gym
from stable_baselines import ACER
from stable_baselines.common.policies import CnnPolicy
from stable_baselines.common.vec_env import DummyVecEnv

# trying to get an idea of how quickly my computer can train this
pong_env = gym.make('Pong-v0')
pong_env = DummyVecEnv([lambda: pong_env])
pong_model_acer = ACER(
    CnnPolicy,
    pong_env,
    verbose=0,
    tensorboard_log="./../../data/baselines-stuff/pong/acer_pong_tensorboard/")
pong_model_acer.learn(total_timesteps=50_000_000,
                      tb_log_name="run-1-50_000_000")

# since I know I'll be stopping it early
pong_model_acer.save(
    './../../data/baselines-stuff/pong/terrible_pong_model_acer')
                     eval_freq=50000,
                     deterministic=True,
                     best_model_save_path=evpath),
        EvalCallback(env1,
                     log_path=savepath,
                     n_eval_episodes=20,
                     eval_freq=10000,
                     deterministic=False,
                     best_model_save_path=savepath)
    ])
    if (os.path.exists("%s/final_model.zip" % savepath)):
        # Instantiate the agent
        model = ACER(policy,
                     env,
                     gamma=gamma,
                     n_steps=episodetimesteps,
                     learning_rate=LR,
                     buffer_size=5000,
                     verbose=1,
                     n_cpu_tf_sess=num_cpu)
        # Load the trained agent
        model = ACER.load("%s/final_model" % savepath, env=env)
        print('loaded agent')
        save_evals()
        model.learn(
            total_timesteps=episodetimesteps**50, callback=callbacklist
        )  #total timesteps set to very large number so program will terminate based on runtime parameter)

    else:
        #create model with Stable Baselines package.
        model = ACER(policy,
                     env,
Ejemplo n.º 30
0
turnspc_s = str(turnspc).split('.')[1]

scenario = str(
    f'{trialv}_{inputfile_s}_t{test}_lr{LR_s}_g{gamma_s}')  #_cpu{ncpu}
savepath = './output/%s/%s' % (scenario, 'eval')

turns = round(x * y * z * turnspc)

env = environment(x, y, z, cutoff, turnspc, policyname, rg_prob='loadenv')

if test == 'CNNACER' or test == 'MLPACER':

    # Instantiate the agent
    model = ACER(policy,
                 env,
                 gamma=gamma,
                 learning_rate=LR,
                 n_steps=episodetimesteps,
                 verbose=1)
    #model = DQN('MlpPolicy', env, learning_rate=LR, prioritized_replay=True, verbose=1)
    #
    # Load the trained agent
    model = ACER.load("%s/best_model" % savepath)
    #model = DQN.load("%s/best_model" % savepath)
    print('loaded agent %s' % savepath)

else:
    # Instantiate the agent
    model = A2C(policy,
                env,
                gamma=gamma,
                learning_rate=LR,