コード例 #1
0
ファイル: Prediction.py プロジェクト: karelbertrands/RLeab
def NewPotential(current_window, algorithm='PPO'):

    # Determine the pretrained agent
    if algorithm == 'A2C':
        model = A2C.load("pretrained_A2C")
    elif algorithm == 'PPO':
        model = PPO2.load("pretrained_PPO")
    elif algorithm == 'ACKTR':
        model = ACKTR.load("pretrained_ACKTR")
    elif algorithm == 'ACER':
        model = ACER.load("pretrained_ACER")
    else:
        raise ValueError("%s is not a valid algorithm." % algorithm)

    if len(current_window) != model.observation_space.shape[0]:
        raise ValueError("%s is does not match the model's window size." %
                         len(current_window))

    action, _states = model.predict(current_window, deterministic=False)

    voltages = np.linspace(0, 1, num=model.action_space.n)
    if action >= 0 and action <= model.action_space.n - 1:
        voltage = voltages[action]
    else:
        raise ValueError(
            "Received invalid action={} which is not part of the action space".
            format(action))

    return voltage
コード例 #2
0
def evaluate_policy(model,
                    eval_data,
                    runs_per_env: int,
                    n_vars: int,
                    episode_length: int,
                    display: bool,
                    printing: bool,
                    wrapped_env: bool = False) -> np.array:
    if type(model) == str:
        model = ACER.load(model)
    differences = []
    for fcm in eval_data:
        target_graph = CausalGraphGenerator.create_graph_from_fcm(fcm)

        for run in range(runs_per_env):
            predicted_graph = apply_policy(model=model,
                                           test_env=fcm,
                                           n_vars=n_vars,
                                           episode_length=episode_length,
                                           display=display,
                                           env_type='Gauss',
                                           printing=printing,
                                           wrapped_env=wrapped_env)

            difference = directed_shd(predicted_graph, target_graph)
            differences.append(difference)
            print('.')

    differences = np.array(differences)
    return differences
コード例 #3
0
def get_existing_model(model_path):

    print('--- Training from existing model', model_path, '---')

    # Load model
    model = ACER.load(model_path)

    return model
コード例 #4
0
def loader(algo, env_name):
    if algo == 'dqn':
        return DQN.load("trained_agents/" + algo + "/" + env_name + ".pkl")
    elif algo == 'ppo2':
        return PPO2.load("trained_agents/" + algo + "/" + env_name + ".pkl")
    elif algo == 'a2c':
        return A2C.load("trained_agents/" + algo + "/" + env_name + ".pkl")
    elif algo == 'acer':
        return ACER.load("trained_agents/" + algo + "/" + env_name + ".pkl")
    elif algo == 'trpo':
        return TRPO.load("trained_agents/" + algo + "/" + env_name + ".pkl")
コード例 #5
0
def load_model(path: str, algorithm: str):
    from stable_baselines import PPO2, DQN, A2C, ACER, GAIL, TRPO
    if algorithm == 'PPO2':
        return PPO2.load(path)
    if algorithm == 'DQN':
        return DQN.load(path)
    if algorithm == 'A2C':
        return A2C.load(path)
    if algorithm == 'ACER':
        return ACER.load(path)
    if algorithm == 'GAIL':
        return GAIL.load(path)
    if algorithm == 'TRPO':
        return TRPO.load(path)
    return None
コード例 #6
0
ファイル: timed_challengeAI.py プロジェクト: avalds/QisCoin
def setup_game():
    playing = True
    while (playing):
        games = input("Do you want to play 5, 10 or 20 games? ")
        if (games.replace(" ", "") == "1"):
            games = 1
            playing = False
        elif (games.replace(" ", "") == "5"):
            games = 5
            playing = False
        elif (games.replace(" ", "") == "10"):
            games = 10
            playing = False
        elif (games.replace(" ", "") == "20"):
            games = 20
            playing = False
        else:
            print("Unrecognized please try again!")

    playing = True
    while (playing):
        AIagent = input(
            "Do you want to play against PPO2(p)(1), A2C(a)(2) or ACER(c)(3) agent?"
        )
        if (AIagent.replace(" ", "").upper() == "p".upper()
                or AIagent.replace(" ", "").upper() == "ppo2".upper()
                or AIagent.replace(" ", "") == "1"):
            AIagent = PPO2.load("models/PPO2-qiscoin-v1-10k")
            ai_name = "PPO2"
            playing = False
        elif (AIagent.replace(" ", "").upper() == "a".upper()
              or AIagent.replace(" ", "").upper() == "a2c".upper()
              or AIagent.replace(" ", "") == "2"):
            AIagent = A2C.load("models/A2C-qiscoin-v1-10k")
            ai_name = "A2C"
            playing = False
        elif (AIagent.replace(" ", "").upper() == "c".upper()
              or AIagent.replace(" ", "").upper() == "acer".upper()
              or AIagent.replace(" ", "") == "3"):
            AIagent = ACER.load("models/ACER-qiscoin-v1-10k")
            ai_name = "ACER"
            playing = False
        else:
            print("Unrecognized please try again!")
    return games, AIagent, ai_name
コード例 #7
0
def record_video():
    """Record of a video for an trained ACER agent"""
    model = ACER.load("models/pacman_acer.pkl", verbose=1)
    env = create_env()
    model.set_env(env)

    video_length = 3000
    env = wrap_video_env(env,
                         name="pacman_acer",
                         video_length=video_length,
                         path='videos/')

    state = env.reset()
    for _ in range(video_length + 1):
        action, _states = model.predict(state)
        state, _, _, _ = env.step(action)
    print("Video recorded")
    env.close()
コード例 #8
0
def train_acer(seed):
    """
    test ACER on the uav_env(cartesian,discrete)
    :param seed: random seed
    :return: evaluation
    """
    """
    ACER(policy, env, gamma=0.99, n_steps=20, num_procs=1, q_coef=0.5, ent_coef=0.01,
    max_grad_norm=10, learning_rate=0.0007, lr_schedule='linear', rprop_alpha=0.99,
    rprop_epsilon=1e-05, buffer_size=5000, replay_ratio=4, replay_start=1000, 
    correction_term=10.0, trust_region=True, alpha=0.99, delta=1, verbose=0, 
    tensorboard_log=None, _init_setup_model=True)
    """
    algo = 'ACER'
    num_timesteps = 3000000

    env = set_up_env(seed)

    global best_mean_reward, n_steps
    best_mean_reward, n_steps = -np.inf, 0

    model = ACER(policy=MlpPolicy, env=env, gamma=0.99, n_steps=20, num_procs=1,
                 q_coef=0.5, ent_coef=0.01, max_grad_norm=10, learning_rate=0.0007,
                 lr_schedule='linear', rprop_alpha=0.99, rprop_epsilon=1e-05,
                 buffer_size=5000, replay_ratio=4, replay_start=1000,
                 correction_term=10.0, trust_region=True, alpha=0.99, delta=1,
                 verbose=0, tensorboard_log="./logs/{}/tensorboard/{}/".format(EXPERIMENT_NATURE, algo))

    model.learn(total_timesteps=num_timesteps, callback=callback, seed=seed,
                log_interval=500, tb_log_name="seed_{}".format(seed))

    model = ACER.load(log_dir + 'best_model.pkl')

    evaluation = evaluate_model(env, model, 100)
    os.makedirs('./logs/{}/csv/{}/'.format(EXPERIMENT_NATURE, algo), exist_ok=True)
    os.rename('/tmp/gym/monitor.csv', "./logs/{}/csv/{}/seed_{}.csv".format(EXPERIMENT_NATURE, algo, seed))
    env.close()
    del model, env
    gc.collect()
    return evaluation
コード例 #9
0
                     verbose=1,
                     tensorboard_log=out_dir)
     elif args.model == 'sac':
         model = SAC("CnnPolicy", env)
     train(model, env, out_dir)
 else:
     #results_plotter.plot_results([log_dir], time_steps, results_plotter.X_TIMESTEPS, "rl")
     path = '{}/best_model.zip'.format(args.eval)
     env = CarEnv(args.eval, cam_idx_list=(0, 3, 4))
     env.next_weather()
     #env = Monitor(env, args.eval)
     #print(env.num_envs)
     if args.model == 'trpo':
         model = TRPO.load(path)
     elif args.model == 'acer':
         model = ACER.load(path)
     elif args.model == 'ppo':
         model = PPO2.load(path)
     elif args.model == 'acktr':
         model = ACKTR.load(path)
     elif args.model == 'ddpg':
         model = DDPG.load(path)
     elif args.model == 'a2c':
         model = A2C.load(path)
     elif args.model == 'sac':
         model = SAC.load(path)
     #mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=5,return_episode_rewards=True)
     #eps_rewards, eps_len = evaluate_policy(model, env, n_eval_episodes=5,return_episode_rewards=True)
     # print(eps_rewards)
     # print(eps_len)
     # print(np.mean(eps_rewards))
コード例 #10
0
ファイル: LoadRLModel.py プロジェクト: t3ch9/freqtrade-gym
class LoadRLModel(IStrategy):
    stoploss = -0.50

    trailing_stop = False

    ticker_interval = '5m'

    # Run "populate_indicators()" only for new candle.
    process_only_new_candles = False

    startup_candle_count: int = 20

    model = ACER.load('model')

    def informative_pairs(self):
        return []

    def populate_indicators(self, dataframe: DataFrame,
                            metadata: dict) -> DataFrame:
        # Momentum Indicators
        # ------------------------------------

        # ADX
        dataframe['adx'] = ta.ADX(dataframe)

        # Plus Directional Indicator / Movement
        dataframe['plus_dm'] = ta.PLUS_DM(dataframe)
        dataframe['plus_di'] = ta.PLUS_DI(dataframe)

        # # Minus Directional Indicator / Movement
        dataframe['minus_dm'] = ta.MINUS_DM(dataframe)
        dataframe['minus_di'] = ta.MINUS_DI(dataframe)

        # Aroon, Aroon Oscillator
        aroon = ta.AROON(dataframe)
        dataframe['aroonup'] = aroon['aroonup']
        dataframe['aroondown'] = aroon['aroondown']
        dataframe['aroonosc'] = ta.AROONOSC(dataframe)

        # Awesome Oscillator
        dataframe['ao'] = qtpylib.awesome_oscillator(dataframe)

        # # Keltner Channel
        # keltner = qtpylib.keltner_channel(dataframe)
        # dataframe["kc_upperband"] = keltner["upper"]
        # dataframe["kc_lowerband"] = keltner["lower"]
        # dataframe["kc_middleband"] = keltner["mid"]
        # dataframe["kc_percent"] = (
        #     (dataframe["close"] - dataframe["kc_lowerband"]) /
        #     (dataframe["kc_upperband"] - dataframe["kc_lowerband"])
        # )
        # dataframe["kc_width"] = (
        #     (dataframe["kc_upperband"] - dataframe["kc_lowerband"]) / dataframe["kc_middleband"]
        # )

        # Ultimate Oscillator
        dataframe['uo'] = ta.ULTOSC(dataframe)

        # Commodity Channel Index: values [Oversold:-100, Overbought:100]
        dataframe['cci'] = ta.CCI(dataframe)

        # RSI
        dataframe['rsi'] = ta.RSI(dataframe)

        # Inverse Fisher transform on RSI: values [-1.0, 1.0] (https://goo.gl/2JGGoy)
        rsi = 0.1 * (dataframe['rsi'] - 50)
        dataframe['fisher_rsi'] = (np.exp(2 * rsi) - 1) / (np.exp(2 * rsi) + 1)

        # Inverse Fisher transform on RSI normalized: values [0.0, 100.0] (https://goo.gl/2JGGoy)
        dataframe['fisher_rsi_norma'] = 50 * (dataframe['fisher_rsi'] + 1)

        # Stochastic Slow
        stoch = ta.STOCH(dataframe)
        dataframe['slowd'] = stoch['slowd']
        dataframe['slowk'] = stoch['slowk']

        # Stochastic Fast
        stoch_fast = ta.STOCHF(dataframe)
        dataframe['fastd'] = stoch_fast['fastd']
        dataframe['fastk'] = stoch_fast['fastk']

        # Stochastic RSI
        stoch_rsi = ta.STOCHRSI(dataframe)
        dataframe['fastd_rsi'] = stoch_rsi['fastd']
        dataframe['fastk_rsi'] = stoch_rsi['fastk']

        # MACD
        macd = ta.MACD(dataframe)
        dataframe['macd'] = macd['macd']
        dataframe['macdsignal'] = macd['macdsignal']
        dataframe['macdhist'] = macd['macdhist']

        # MFI
        dataframe['mfi'] = ta.MFI(dataframe)

        # # ROC
        dataframe['roc'] = ta.ROC(dataframe)

        # Overlap Studies
        # ------------------------------------

        # # Bollinger Bands
        # bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(dataframe), window=20, stds=2)
        # dataframe['bb_lowerband'] = bollinger['lower']
        # dataframe['bb_middleband'] = bollinger['mid']
        # dataframe['bb_upperband'] = bollinger['upper']
        # dataframe["bb_percent"] = (
        #     (dataframe["close"] - dataframe["bb_lowerband"]) /
        #     (dataframe["bb_upperband"] - dataframe["bb_lowerband"])
        # )
        # dataframe["bb_width"] = (
        #     (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["bb_middleband"]
        # )

        # # Bollinger Bands - Weighted (EMA based instead of SMA)
        # weighted_bollinger = qtpylib.weighted_bollinger_bands(
        #     qtpylib.typical_price(dataframe), window=20, stds=2
        # )
        # dataframe["wbb_upperband"] = weighted_bollinger["upper"]
        # dataframe["wbb_lowerband"] = weighted_bollinger["lower"]
        # dataframe["wbb_middleband"] = weighted_bollinger["mid"]
        # dataframe["wbb_percent"] = (
        #     (dataframe["close"] - dataframe["wbb_lowerband"]) /
        #     (dataframe["wbb_upperband"] - dataframe["wbb_lowerband"])
        # )
        # dataframe["wbb_width"] = (
        #     (dataframe["wbb_upperband"] - dataframe["wbb_lowerband"]) /
        #     dataframe["wbb_middleband"]
        # )

        # # EMA - Exponential Moving Average
        # dataframe['ema3'] = ta.EMA(dataframe, timeperiod=3)
        # dataframe['ema5'] = ta.EMA(dataframe, timeperiod=5)
        # dataframe['ema10'] = ta.EMA(dataframe, timeperiod=10)
        # dataframe['ema21'] = ta.EMA(dataframe, timeperiod=21)
        # dataframe['ema50'] = ta.EMA(dataframe, timeperiod=50)
        # dataframe['ema100'] = ta.EMA(dataframe, timeperiod=100)

        # # SMA - Simple Moving Average
        # dataframe['sma3'] = ta.SMA(dataframe, timeperiod=3)
        # dataframe['sma5'] = ta.SMA(dataframe, timeperiod=5)
        # dataframe['sma10'] = ta.SMA(dataframe, timeperiod=10)
        # dataframe['sma21'] = ta.SMA(dataframe, timeperiod=21)
        # dataframe['sma50'] = ta.SMA(dataframe, timeperiod=50)
        # dataframe['sma100'] = ta.SMA(dataframe, timeperiod=100)

        # Parabolic SAR
        # dataframe['sar'] = ta.SAR(dataframe)

        # TEMA - Triple Exponential Moving Average
        # dataframe['tema'] = ta.TEMA(dataframe, timeperiod=9)

        # # Cycle Indicator
        # # ------------------------------------
        # # Hilbert Transform Indicator - SineWave
        # hilbert = ta.HT_SINE(dataframe)
        # dataframe['htsine'] = hilbert['sine']
        # dataframe['htleadsine'] = hilbert['leadsine']

        # # Pattern Recognition - Bullish candlestick patterns
        # # ------------------------------------
        # # Hammer: values [0, 100]
        # dataframe['CDLHAMMER'] = ta.CDLHAMMER(dataframe)
        # # Inverted Hammer: values [0, 100]
        # dataframe['CDLINVERTEDHAMMER'] = ta.CDLINVERTEDHAMMER(dataframe)
        # # Dragonfly Doji: values [0, 100]
        # dataframe['CDLDRAGONFLYDOJI'] = ta.CDLDRAGONFLYDOJI(dataframe)
        # # Piercing Line: values [0, 100]
        # dataframe['CDLPIERCING'] = ta.CDLPIERCING(dataframe) # values [0, 100]
        # # Morningstar: values [0, 100]
        # dataframe['CDLMORNINGSTAR'] = ta.CDLMORNINGSTAR(dataframe) # values [0, 100]
        # # Three White Soldiers: values [0, 100]
        # dataframe['CDL3WHITESOLDIERS'] = ta.CDL3WHITESOLDIERS(dataframe) # values [0, 100]

        # # Pattern Recognition - Bearish candlestick patterns
        # # ------------------------------------
        # # Hanging Man: values [0, 100]
        # dataframe['CDLHANGINGMAN'] = ta.CDLHANGINGMAN(dataframe)
        # # Shooting Star: values [0, 100]
        # dataframe['CDLSHOOTINGSTAR'] = ta.CDLSHOOTINGSTAR(dataframe)
        # # Gravestone Doji: values [0, 100]
        # dataframe['CDLGRAVESTONEDOJI'] = ta.CDLGRAVESTONEDOJI(dataframe)
        # # Dark Cloud Cover: values [0, 100]
        # dataframe['CDLDARKCLOUDCOVER'] = ta.CDLDARKCLOUDCOVER(dataframe)
        # # Evening Doji Star: values [0, 100]
        # dataframe['CDLEVENINGDOJISTAR'] = ta.CDLEVENINGDOJISTAR(dataframe)
        # # Evening Star: values [0, 100]
        # dataframe['CDLEVENINGSTAR'] = ta.CDLEVENINGSTAR(dataframe)

        # # Pattern Recognition - Bullish/Bearish candlestick patterns
        # # ------------------------------------
        # # Three Line Strike: values [0, -100, 100]
        # dataframe['CDL3LINESTRIKE'] = ta.CDL3LINESTRIKE(dataframe)
        # # Spinning Top: values [0, -100, 100]
        # dataframe['CDLSPINNINGTOP'] = ta.CDLSPINNINGTOP(dataframe) # values [0, -100, 100]
        # # Engulfing: values [0, -100, 100]
        # dataframe['CDLENGULFING'] = ta.CDLENGULFING(dataframe) # values [0, -100, 100]
        # # Harami: values [0, -100, 100]
        # dataframe['CDLHARAMI'] = ta.CDLHARAMI(dataframe) # values [0, -100, 100]
        # # Three Outside Up/Down: values [0, -100, 100]
        # dataframe['CDL3OUTSIDE'] = ta.CDL3OUTSIDE(dataframe) # values [0, -100, 100]
        # # Three Inside Up/Down: values [0, -100, 100]
        # dataframe['CDL3INSIDE'] = ta.CDL3INSIDE(dataframe) # values [0, -100, 100]

        # # Chart type
        # # ------------------------------------
        # # Heikin Ashi Strategy
        # heikinashi = qtpylib.heikinashi(dataframe)
        # dataframe['ha_open'] = heikinashi['open']
        # dataframe['ha_close'] = heikinashi['close']
        # dataframe['ha_high'] = heikinashi['high']
        # dataframe['ha_low'] = heikinashi['low']

        # Retrieve best bid and best ask from the orderbook
        # ------------------------------------
        """
        # first check if dataprovider is available
        if self.dp:
            if self.dp.runmode in ('live', 'dry_run'):
                ob = self.dp.orderbook(metadata['pair'], 1)
                dataframe['best_bid'] = ob['bids'][0][0]
                dataframe['best_ask'] = ob['asks'][0][0]
        """

        return dataframe

    def populate_buy_trend(self, dataframe: DataFrame,
                           metadata: dict) -> DataFrame:
        """
        Based on TA indicators, populates the buy signal for the given dataframe
        :param dataframe: DataFrame populated with indicators
        :param metadata: Additional information, like the currently traded pair
        :return: DataFrame with buy column
        """
        # dataframe.loc[
        #     (
        #         (qtpylib.crossed_above(dataframe['rsi'], 30)) &  # Signal: RSI crosses above 30
        #         (dataframe['tema'] <= dataframe['bb_middleband']) &  # Guard: tema below BB middle
        #         (dataframe['tema'] > dataframe['tema'].shift(1)) &  # Guard: tema is raising
        #         (dataframe['volume'] > 0)  # Make sure Volume is not 0
        #     ),
        #     'buy'] = 1
        action, nan_list = self.rl_model_redict(dataframe)
        dataframe.loc[action == 1, 'buy'] = 1
        dataframe.loc[nan_list == True, 'buy'] = 0
        return dataframe

    def populate_sell_trend(self, dataframe: DataFrame,
                            metadata: dict) -> DataFrame:
        """
        Based on TA indicators, populates the sell signal for the given dataframe
        :param dataframe: DataFrame populated with indicators
        :param metadata: Additional information, like the currently traded pair
        :return: DataFrame with buy column
        """
        # dataframe.loc[
        #     (
        #         (qtpylib.crossed_above(dataframe['rsi'], 70)) &  # Signal: RSI crosses above 70
        #         (dataframe['tema'] > dataframe['bb_middleband']) &  # Guard: tema above BB middle
        #         (dataframe['tema'] < dataframe['tema'].shift(1)) &  # Guard: tema is falling
        #         (dataframe['volume'] > 0)  # Make sure Volume is not 0
        #     ),
        #     'sell'] = 1
        action, nan_list = self.rl_model_redict(dataframe)
        dataframe.loc[action == 2, 'sell'] = 1
        dataframe.loc[nan_list == True, 'sell'] = 0
        return dataframe

    def rl_model_redict(self, dataframe):
        data = np.array(
            [
                dataframe['adx'],
                dataframe['plus_dm'],
                dataframe['plus_di'],
                dataframe['minus_dm'],
                dataframe['minus_di'],
                dataframe['aroonup'],
                dataframe['aroondown'],
                dataframe['aroonosc'],
                dataframe['ao'],
                # dataframe['kc_percent'],
                # dataframe['kc_width'],
                dataframe['uo'],
                dataframe['cci'],
                dataframe['rsi'],
                dataframe['fisher_rsi'],
                dataframe['slowd'],
                dataframe['slowk'],
                dataframe['fastd'],
                dataframe['fastk'],
                dataframe['fastd_rsi'],
                dataframe['fastk_rsi'],
                dataframe['macd'],
                dataframe['macdsignal'],
                dataframe['macdhist'],
                dataframe['mfi'],
                dataframe['roc'],
                # row['bb_percent'],
                # row['bb_width'],
                # row['wbb_percent'],
                # row['wbb_width'],
                # dataframe['htsine'],
                # dataframe['htleadsine'],
                # row['CDLHAMMER'],
                # row['CDLINVERTEDHAMMER'],
                # row['CDLDRAGONFLYDOJI'],
                # row['CDLPIERCING'],
                # row['CDLMORNINGSTAR'],
                # row['CDL3WHITESOLDIERS'],
                # row['CDLHANGINGMAN'],
                # row['CDLSHOOTINGSTAR'],
                # row['CDLGRAVESTONEDOJI'],
                # row['CDLDARKCLOUDCOVER'],
                # row['CDLEVENINGDOJISTAR'],
                # row['CDLEVENINGSTAR'],
                # row['CDL3LINESTRIKE'],
                # row['CDLSPINNINGTOP'],
                # row['CDLENGULFING'],
                # row['CDLHARAMI'],
                # row['CDL3OUTSIDE'],
                # row['CDL3INSIDE'],
                # trad_status,
                # (self.trade != None)
            ],
            dtype=np.float)

        data = data.reshape(-1, 24)

        nan_list = np.isnan(data).any(axis=1)
        data = np.nan_to_num(data)
        action, _ = self.model.predict(data, deterministic=True)

        return action, nan_list
コード例 #11
0
                     eval_freq=10000,
                     deterministic=False,
                     best_model_save_path=savepath)
    ])
    if (os.path.exists("%s/best_model.zip" % savepath)):
        # Instantiate the agent
        model = ACER(policy,
                     env,
                     gamma=gamma,
                     n_steps=episodetimesteps,
                     learning_rate=LR,
                     buffer_size=10000,
                     verbose=1,
                     n_cpu_tf_sess=num_cpu)
        # Load the trained agent
        model = ACER.load("%s/best_model" % savepath, env=env)
        print('loaded agent')
        model.learn(
            total_timesteps=episodetimesteps**50, callback=callbacklist
        )  #total timesteps set to very large number so program will terminate based on runtime parameter)

    else:
        #create model with Stable Baselines package.
        model = ACER(policy,
                     env,
                     gamma=gamma,
                     n_steps=episodetimesteps,
                     learning_rate=LR,
                     buffer_size=10000,
                     verbose=1,
                     n_cpu_tf_sess=num_cpu)  #, tensorboard_log=scenario)
コード例 #12
0
scenario=str(f'{trialv}_{inputfile_s}_t{test}_lr{LR_s}_g{gamma_s}')  
savepath='./output/%s' % scenario




for n in range(500):
    turns=round(random.random()*x*y*z*turnspc)
    env = environment(x,y,z,gamma, turnspc, policyname, rg_prob='loadenv')
    
    # Instantiate the agent
    model = ACER(policy, env, gamma=gamma, n_steps=episodetimesteps, learning_rate=LR,  buffer_size=10000,  verbose=1)
    
    # Load the trained agent
    model = ACER.load("%s/best_model" % savepath)
    
    # Evaluate the agent
    #mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
    
    # Enjoy trained agent
    obs = env.reset()
    for i in range(turns):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        #print(action, rewards, dones)
        #env.renderif('on')
        if dones == True:
            break
    env.save()
    #env.render()
コード例 #13
0
ファイル: exp7.py プロジェクト: sa-and/interventional_RL
        '8': 'data/fcms/gauss/8x0_5000.pkl'}
ep_lengths = {'6': 30,
              '8': 40}
runs_per_env = 1
collected_data = {'vars': [],
                  'env': [],
                  'run': [],
                  'algo': [],
                  'time': []}

run_experiment = True
analyze_experiment = True

if run_experiment:
    for var in vars:
        model = ACER.load(model_paths[var])
        envs = FCMGenerator.load_dataset(data[var])[:500]
        env_counter = 0
        print(var+' var environments')
        bar = tqdm(total=len(envs)*len(algos)*runs_per_env)
        for env in envs:
            # create fcm environment for our algo
            fcm_env = FCMEnvironment(agent=DiscreteAgent(int(var), env_type='Gauss'),
                                     fcm=env,
                                     eval_func=NoEval())
            # collect obs data for notears and GES
            obs_data = DataFrame(columns=['X'+str(i) for i in range(int(var))])
            for i in range(1000):
                inst = env.get_next_instantiation()[0]
                obs_data = obs_data.append({'X' + str(i): float(inst[i]) for i in range(len(inst))},
                                           ignore_index=True)
コード例 #14
0
    print('Usage: python play.py <env> <model> <agent_name>')
    sys.exit()

env_name = sys.argv[1]
model_type = sys.argv[2]
model_name = sys.argv[3] + '/agent'

env = gym.make(env_name)

if model_type == 'ppo1':
    from stable_baselines.common.policies import MlpPolicy
    from stable_baselines import PPO1
    model = PPO1.load(model_name)

elif model_type == 'dqn':
    from stable_baselines.deepq.policies import MlpPolicy
    from stable_baselines import DQN
    model = DQN.load(model_name)

elif model_type == 'acer':
    from stable_baselines.common.policies import MlpPolicy
    from stable_baselines import ACER
    model = ACER.load(model_name)

obs = env.reset()
done = False
while not done:
    action, _states = model.predict(obs)
    obs, rewards, done, info = env.step(action)
    env.render()
コード例 #15
0
import matplotlib.pyplot as plt

# There already exists an environment generator
# that will make and wrap atari environments correctly.
# Here we are also multiprocessing training (num_env=4 => 4 processes)
env = make_atari_env('PongNoFrameskip-v4', num_env=4, seed=0)
# Frame-stacking with 4 frames
env = VecFrameStack(env, n_stack=4)

# model = ACER('CnnPolicy', env, verbose=1)
# model.learn(total_timesteps=25000)

# # save
# model.save("cnn_pong")

# load
model = ACER.load("cnn_pong")
print(model)

obs = env.reset()
for i in range(1000):
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    print(rewards)
    # env.render()
    # env.render(mode='rgb_array')
    img = env.render(mode='rgb_array')
    plt.imshow(img)
    print(type(img))
    plt.show()
コード例 #16
0
def test_acer(name):
    model_path = os.path.join('models', name)
    model = ACER.load(model_path)
    return model
コード例 #17
0
                     eval_freq=10000,
                     deterministic=False,
                     best_model_save_path=savepath)
    ])
    if (os.path.exists("%s/final_model.zip" % savepath)):
        # Instantiate the agent
        model = ACER(policy,
                     env,
                     gamma=gamma,
                     n_steps=episodetimesteps,
                     learning_rate=LR,
                     buffer_size=5000,
                     verbose=1,
                     n_cpu_tf_sess=num_cpu)
        # Load the trained agent
        model = ACER.load("%s/final_model" % savepath, env=env)
        print('loaded agent')
        save_evals()
        model.learn(
            total_timesteps=episodetimesteps**50, callback=callbacklist
        )  #total timesteps set to very large number so program will terminate based on runtime parameter)

    else:
        #create model with Stable Baselines package.
        model = ACER(policy,
                     env,
                     gamma=gamma,
                     n_steps=episodetimesteps,
                     learning_rate=LR,
                     buffer_size=5000,
                     verbose=1,
コード例 #18
0
    end_time = time.time()
    print('Training time for algorithm {}: {:.2f}s = {:.2f}min = {:.4f}hrs'.format(algo_list[alg],\
        end_time-start_time,(end_time-start_time)/60,(end_time-start_time)/3600))
    print('Trained using RL')
else: #test
    print('Testing {} learnt policy from model file {} for {} games!'.format(algo_list[alg],\
        args.model,int(args.num_test)))
    start_time = time.time()
    if alg == 0:
        model = TRPO.load(args.model)
    elif alg == 1:
        model = DQN.load(args.model)
    elif alg == 2:
        model = ACKTR.load(args.model)
    elif alg == 3:
        model = ACER.load(args.model)
    elif alg == 4:
        model = A2C.load(args.model)
    elif alg == 5:
        model = PPO1.load(args.model)
    env = gym.make('gym_pursuitevasion_small:pursuitevasion_small-v0')
    g = 1
    obs = env.reset(ep=g)
    e_win_games = int(0)
    while True:
        action, _states = model.predict(obs)
        obs, rewards, done, e_win = env.step(action)
        if done:
            g += 1
            obs = env.reset(ep=g)
            if g % 100 == 0: