Ejemplo n.º 1
0
 def __init__(self, agent_spec, agent_additional_parameters, agents_count):
     self.agents = []
     first_agent = Agent.from_spec(spec=agent_spec,
                                   kwargs=agent_additional_parameters)
     self.agents.append(first_agent)
     self.model = first_agent.model
     self.stop = False
     for _ in range(agents_count - 1):
         agent = Agent.from_spec(spec=agent_spec,
                                 kwargs=agent_additional_parameters)
         agent.model.close()
         agent.model = self.model
         self.agents.append(agent)
    def restore_agent(self, path: str, model_path: str = None):
        """Deserialize the strategy's learning agent from a file.

        Arguments:
            path: The `str` path of the file the agent specification is stored in.
                The `.json` file extension will be automatically appended if not provided.
            model_path (optional): The `str` path of the file or directory the agent checkpoint is stored in.
                If not provided, the `model_path` will default to `{path_without_dot_json}/agents`.
        """
        path_with_ext = path if path.endswith('.json') else f'{path}.json'

        with open(path_with_ext) as json_file:
            spec = json.load(json_file)

            self._agent_spec = spec.agent
            self._network_spec = spec.network

        self._agent = Agent.from_spec(spec=self._agent_spec,
                                      kwargs=dict(
                                          network=self._network_spec,
                                          states=self._environment.states,
                                          actions=self._environment.actions))

        path_without_ext = path_with_ext.replace('.json', '')
        model_path = model_path or f'{path_without_ext}/agent'

        self._agent.restore_model(file=model_path)

        self._runner = Runner(agent=self._agent, environment=self._environment)
    def __init__(self,
                 environment: TradingEnvironment,
                 agent_spec: Dict = None,
                 network_spec: Dict = None,
                 **kwargs):
        """
        Arguments:
            environment: A `TradingEnvironment` instance for the agent to trade within.
            agent_spec: A specification dictionary for the `Tensorforce` agent.
            network_sepc: A specification dictionary for the `Tensorforce` agent's model network.
            kwargs (optional): Optional keyword arguments to adjust the strategy.
        """
        self._environment = environment

        self._max_episode_timesteps = kwargs.get('max_episode_timesteps', None)

        if agent_spec and network_spec:
            self._agent_spec = agent_spec
            self._network_spec = network_spec

            self._agent = Agent.from_spec(spec=agent_spec,
                                          kwargs=dict(
                                              network=network_spec,
                                              states=environment.states,
                                              actions=environment.actions))

            self._runner = Runner(agent=self._agent, environment=environment)
Ejemplo n.º 4
0
    def __init__(self, env=None, device=None):
        self.env = env
        if self.env.saver.model_file_name == "":
            try:
                self.env.saver.model_file_name = self.env.model_name + "_" + self.env.dataDirectory.replace("/", "")
            except:
                self.env.saver.model_file_name = self.env.model_name + "_" + self.env.dataDirectory.replace("/", "")
            if not os.path.exists(self.env.saver.model_directory+ "/model"):
                os.mkdir(self.env.saver.model_directory+ "/model")
            self.env.saver.model_file_path = self.env.saver.model_directory + "/model/" + self.env.saver.model_file_name



        self.agent = Agents.from_spec(
            self.env.settings['agent'],
            kwargs=dict(
                states=self.env.states,
                actions=dict(type='int', num_actions=self.env.actions),
                network=self.env.settings['network'],
                device=device
            )
        )

        try:
            self.agent.restore_model(self.env.saver.model_directory+"/model")
        except:
            pass
Ejemplo n.º 5
0
    def __init__(self, agent, environments):
        if not util.is_iterable(x=environments):
            raise TensorforceError.type(name='parallel-runner',
                                        argument='environments',
                                        value=environments)
        elif len(environments) == 0:
            raise TensorforceError.value(name='parallel-runner',
                                         argument='environments',
                                         value=environments)

        if not isinstance(agent, Agent):
            agent = Agent.from_spec(spec=agent,
                                    states=environments[0].states(),
                                    actions=environments[0].actions(),
                                    parallel_interactions=len(environments))

        if len(environments) > agent.parallel_interactions:
            raise TensorforceError(message="Too many environments.")

        self.agent = agent
        self.environments = tuple(environments)

        self.agent.initialize()
        self.global_episode = self.agent.episode
        self.global_timestep = self.agent.timestep
        self.episode_rewards = list()
        self.episode_timesteps = list()
        self.episode_times = list()
Ejemplo n.º 6
0
def build(agent_spec, actor, env):
    agent = Agent.from_spec(spec=agent_spec,
                            kwargs=dict(states=env.states,
                                        actions=env.actions,
                                        network=actor))
    runner = Runner(agent=agent, environment=env, repeat_actions=1)
    return runner, agent
Ejemplo n.º 7
0
 def __init__(self, agent_type, network, action_type, preprocessor_type,
              reward, tag):
     rf = reward_functions.__dict__[reward]
     super().__init__("-".join(
         [agent_type, network, action_type, reward, tag]))
     agent_spec = create_spec(action_type, agent_type, network)
     self._tf_agent = Agent.from_spec(agent_spec, {})
     self.action_translator = get_action_translator(action_type)
     self.preprocessor = get_observation_preprocessor(preprocessor_type)
Ejemplo n.º 8
0
 def generate_tensorforce_agent(self):
     with open('tensorforce_configs/mlp2_128_network.json', 'r') as fp:
         network_spec = json.load(fp=fp)
     with open('tensorforce_configs/ppo.json', 'r') as fp:
         agent_config = json.load(fp=fp)
     tensorforce_agent = Agent.from_spec(
         spec=agent_config,
         kwargs=dict(
             states=self.environment.states,
             actions=self.environment.actions,
             network=network_spec,
         ))
     return tensorforce_agent
Ejemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('-i', '--config', help="Configuration file")

    args = parser.parse_args()
    print(args)
    sys.stdout.flush()

    if args.config is not None:
        with open(args.config, 'r') as fp:
            config = json.load(fp=fp)
    else:
        raise TensorforceError("No configuration provided.")

    if 'agent' not in config:
        raise TensorforceError("No agent configuration provided.")
    else:
        agent_config = config['agent']

    if 'network_spec' not in config:
        network_spec = None
        print("No network configuration provided.")
    else:
        network_spec = config['network_spec']

    if 'env' not in config:
        raise TensorforceError("No environment configuration provided.")
    else:
        env_config = config['env']

    environment = RecTableEnv(config)
    environment.set_up()

    agent_config['env'] = environment

    agent = Agent.from_spec(spec=agent_config,
                            kwargs=dict(
                                states_spec=environment.states,
                                actions_spec=environment.actions,
                                network_spec=network_spec,
                                batch_data=environment.get_input_tensor()))

    environment.set_session(agent.model.get_session())

    print("********** Configuration ************")
    for key, value in agent_config.items():
        print(str(key) + ": {}".format(value))

    agent.run_worker()
    agent.close()
Ejemplo n.º 10
0
    def run_experiment(self, environment, experiment_num=0):
        config = copy(self.config)

        max_episodes = config.pop('max_episodes')
        max_episode_timesteps = config.pop('max_episode_timesteps')

        network_spec = config.pop('network')

        agent = Agent.from_spec(
            spec=config,
            kwargs=dict(
                states_spec=environment.states,
                actions_spec=environment.actions,
                network_spec=network_spec
            )
        )

        if experiment_num == 0 and self.history_data:
            logging.info("Attaching history data to runner")
            history_data = self.history_data
        else:
            history_data = None

        if experiment_num == 0 and self.load_model_file:
            logging.info("Loading model data from file: {}".format(self.load_model))
            agent.load_model(self.load_model_file)

        runner = Runner(
            agent=agent,
            environment=environment,
            repeat_actions=1,
            history=history_data
            # save_path=args.model,
            # save_episodes=args.save_model
        )

        environment.reset()
        agent.reset()

        runner.run(episodes=max_episodes, max_episode_timesteps=max_episode_timesteps,
                   episode_finished=self.episode_finished)

        return dict(
            initial_reset_time=0,
            episode_rewards=runner.episode_rewards,
            episode_timesteps=runner.episode_timesteps,
            episode_end_times=runner.episode_times
        )
Ejemplo n.º 11
0
    def __init__(self, agent, environment, evaluation_environment=None):
        if not isinstance(agent, Agent):
            agent = Agent.from_spec(spec=agent,
                                    states=environment.states(),
                                    actions=environment.actions())

        self.agent = agent
        self.environment = environment
        self.evaluation_environment = evaluation_environment

        self.agent.initialize()
        self.global_episode = self.agent.episode
        self.global_timestep = self.agent.timestep
        self.episode_rewards = list()
        self.episode_timesteps = list()
        self.episode_times = list()
Ejemplo n.º 12
0
def main():

    env = EnvArDrone()
    restore_model_path = "./models"

    #Optional GPU usage configuration
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.25)

    #Network configuration
    network_spec = [
        dict(type='dense', size=64, activation='tanh'),
        dict(type='dense', size=64, activation='tanh')
    ]

    agent_file = "../configs/ppo.json"

    #Agent configuration file
    with open(agent_file, 'r') as fp:
        agent_config = json.load(fp=fp)

    #agent_config['execution']['session_config'] = tf.ConfigProto(gpu_options=gpu_options)

    agent = Agent.from_spec(spec=agent_config,
                            kwargs=dict(
                                states=env.states,
                                actions=env.actions,
                                network=network_spec,
                            ))

    if os.path.exists(restore_model_path):
        print("Restoring saved model....")
        agent.restore_model(directory=restore_model_path)

    print("Running model trained on {agent} for Environment '{env}'".format(
        agent=agent, env=env))

    #Running policy
    state = env.reset()
    try:
        while (True):
            actions = agent.act(state, deterministic=True)
            state, _, _ = env.execute(actions)
    except KeyboardInterrupt:
        print("Run finished")
Ejemplo n.º 13
0
    def __init__(self, env, save_dir, deterministic=True):

        self.save_dir = save_dir
        self.env = env
        self.deterministic = deterministic

        # training spec
        with open(os.path.join(save_dir, 'training_spec.json'), 'r') as f:
            training_spec = json.load(f)

        self.repeat_actions = training_spec["repeat_actions"]
        self.momentum = training_spec["momentum"]
        self.max_episode_timesteps = training_spec["max_episode_timesteps"]
        self.num_agent_clones = env.gym.dog_count

        # network spec
        with open(os.path.join(save_dir, 'network_spec.json'), 'r') as f:
            network_spec = json.load(f)

        if os.path.exists(os.path.join(save_dir, 'preprocessing_spec.json')):
            with open(os.path.join(save_dir, 'preprocessing_spec.json'),
                      'r') as f:
                preprocessing_spec = json.load(f)
        else:
            preprocessing_spec = None

        # agent spec
        with open(os.path.join(save_dir, 'agent_spec.json'), 'r') as f:
            agent_spec = json.load(f)

        self.agent = Agent.from_spec(
            spec=agent_spec,
            kwargs=dict(states=env.states,
                        actions=env.actions,
                        network=network_spec,
                        states_preprocessing=preprocessing_spec))

        self.load_model()
Ejemplo n.º 14
0
    def __init__(self, env=None, device=None):
        ''' Wrapper agent class '''

        self.env = env
        if not self.env.settings['agent']['type'] == "DEEP":
            # Hide TF loading logs
            os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
            from tensorforce.agents import Agent as Agents
            # Check if there is any existing model for this config
            # Make directory if config doesn't exist
            if self.env.saver.model_file_name == "":
                try:
                    self.env.saver.model_file_name = self.env.model_name
                except:
                    self.env.saver.model_file_name = self.env.model_name
                if not os.path.exists(self.env.saver.model_directory +
                                      "/model"):
                    os.mkdir(self.env.saver.model_directory + "/model")
                self.env.saver.model_file_path = self.env.saver.model_directory + "/model/"
            # Load agent from current config
            self.agent = Agents.from_spec(
                self.env.settings['agent'],
                kwargs=dict(states=self.env.states,
                            actions=dict(type='int',
                                         num_actions=self.env.actions),
                            network=self.env.settings['network'],
                            device=device))
            # Load agent if it already exists
            try:
                self.agent.restore_model(self.env.saver.model_file_path)
                print("agent loaded")
            except:
                print("agent not loaded")
                pass
        else:
            from .DEEP import DEEP
            # Load deep learning model (keras model), only work with eval mode
            self.agent = DEEP("deep_model.h5")
Ejemplo n.º 15
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        '-P',
        '--port',
        default=6025,
        help=
        "Port on which the UE4 Game listens on for incoming RL-client connections"
    )
    parser.add_argument('-H',
                        '--host',
                        default=None,
                        help="Hostname of the UE4 Game (default: localhost)")
    parser.add_argument('-a',
                        '--agent-config',
                        help="Agent configuration file")
    parser.add_argument('-n',
                        '--network-spec',
                        default=None,
                        help="Network specification file")
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=None,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--timesteps',
                        type=int,
                        default=None,
                        help="Number of timesteps")
    parser.add_argument('-m',
                        '--max-episode-timesteps',
                        type=int,
                        default=None,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-d',
                        '--deterministic',
                        action='store_true',
                        default=False,
                        help="Choose actions deterministically")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=False,
                        help="Show debug outputs")
    parser.add_argument('-R',
                        '--random-test-run',
                        action="store_true",
                        help="Do a quick random test run on the env")

    args = parser.parse_args()

    #logging.basicConfig(filename="logfile.txt", level=logging.INFO)
    logging.basicConfig(stream=sys.stderr)
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)

    # We have to connect this remote env to get the specs.
    # We also discretize axis-mappings b/c we will use a deep q-network.
    # Use num_ticks==6 to match Nature paper by Mnih et al.
    # ("human cannot press fire button with more than 10Hz", dt=1/60)
    # TODO: Need to build in capturing and concat'ing last 4 images (plus 8-bit conversion!) into 1 input state signal.
    # TODO: Use pre-processor for that.
    environment = UE4Environment(host=args.host,
                                 port=args.port,
                                 connect=True,
                                 discretize_actions=True,
                                 num_ticks=6)
    environment.seed(200)

    # Do a quick random test-run with image capture of the first n images -> then exit after 1000 steps.
    if args.random_test_run:
        # Reset the env.
        s = environment.reset()
        img = Image.fromarray(s, "RGB")
        # Save first received image as a sanity-check.
        img.save("reset.png")
        for i in range(1000):
            s, is_terminal, r = environment.execute(actions=random.choice(
                range(environment.actions["num_actions"])))
            if i < 10:
                img = Image.fromarray(s, "RGB")
                img.save("{:03d}.png".format(i))
            logging.debug("i={} r={} term={}".format(i, r, is_terminal))
            if is_terminal:
                environment.reset()
        quit()

    if args.agent_config is not None:
        with open(args.agent_config, 'r') as fp:
            agent_config = json.load(fp=fp)
    else:
        raise TensorForceError("No agent configuration provided.")

    if args.network_spec is not None:
        with open(args.network_spec, 'r') as fp:
            network_spec = json.load(fp=fp)
    else:
        network_spec = None
        logger.info("No network configuration provided.")

    agent = Agent.from_spec(spec=agent_config,
                            kwargs=dict(states_spec=environment.states,
                                        actions_spec=environment.actions,
                                        network_spec=network_spec))
    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.restore_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    runner = Runner(agent=agent, environment=environment, repeat_actions=1)

    if args.debug:  # TODO: Timestep-based reporting
        report_episodes = 1
    else:
        report_episodes = 100

    logger.info("Starting {agent} for Environment '{env}'".format(
        agent=agent, env=environment))

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            steps_per_second = r.timestep / (time.time() - r.start_time)
            logger.info(
                "Finished episode {} after {} timesteps. Steps Per Second {}".
                format(r.agent.episode, r.episode_timestep, steps_per_second))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(
                sum(r.episode_rewards[-500:]) /
                min(500, len(r.episode_rewards))))
            logger.info("Average of last 100 rewards: {}".format(
                sum(r.episode_rewards[-100:]) /
                min(100, len(r.episode_rewards))))
        return True

    runner.run(timesteps=args.timesteps,
               episodes=args.episodes,
               max_episode_timesteps=args.max_episode_timesteps,
               deterministic=args.deterministic,
               episode_finished=episode_finished)

    logger.info("Learning finished. Total episodes: {ep}".format(
        ep=runner.agent.episode))
Ejemplo n.º 16
0
from tensorforce.agents import Agent
from tensorforce.execution import Runner
from tensorforce.contrib.openai_gym import OpenAIGym

# Create an OpenAIgym environment
env = OpenAIGym('Pendulum-v0', visualize=False)

network_path = './pendulum_ppo_network.json'
agent_path = './pendulum_ppo.json'
with open(network_path, 'r') as fp:
    network_spec = json.load(fp=fp)
with open(agent_path, 'r') as fp:
    agent_config = json.load(fp=fp)
agent = Agent.from_spec(spec=agent_config,
                        kwargs=dict(states=env.states,
                                    actions=env.actions,
                                    network=network_spec))

# Create the runner
runner = Runner(agent=agent, environment=env)


# Callback function printing episode statistics
def episode_finished(r):
    print(
        "Finished episode {ep} after {ts} timesteps (reward: {reward})".format(
            ep=r.episode, ts=r.episode_timestep, reward=r.episode_rewards[-1]))
    return True


# Start learning
Ejemplo n.º 17
0
def main():
    theMarket = Market(
        data_path="data/%s_Candlestick_4_Hour_BID_01.08.2018-30.11.2018.csv" %
        CURRENCY_PAIR)  #, indicators={'ADX': 12})
    MyRecord = Record()
    MyOrderManager = OrderManager(market=theMarket, record=MyRecord)
    MyTrader = SureFireTrader(orderManager=MyOrderManager)

    SLTP_pips = [20, 25, 30]
    start_order_type = ['BUY', 'SELL']
    max_level_limit = [2, 3, 4]
    window_size = 12

    # Create a RL agent
    with open("config/%s.json" % AGENT_METHOD, 'r') as fp:
        agent_config = json.load(fp=fp)
    with open("config/conv2d.json", 'r') as fp:
        network_config = json.load(fp=fp)
    agent = Agent.from_spec(
        spec=agent_config,
        kwargs=dict(
            states=dict(type='float', shape=(window_size, window_size,
                                             4)),  #[Open, High, Low, Close]
            actions=dict(
                SLTP_pips=dict(type='int',
                               num_actions=len(SLTP_pips)),  #[20,25,30]
                start_order_type=dict(
                    type='int',
                    num_actions=len(start_order_type)),  #['BUY','SELL']
                max_level_limit=dict(
                    type='int', num_actions=len(max_level_limit))  #[2,3,4,5]
            ),
            network=network_config))
    if not os.path.exists("save_model/%s/trades" % AGENT_METHOD):
        os.makedirs("save_model/%s/trades" % AGENT_METHOD)

    reward_history = []
    for episode in trange(100 + 1, ascii=True):

        profit_history = []
        this_reward_history = []
        idle_count = 0
        round_count = 0
        episode_end = False
        max_idle_limit = 12  #future action
        MyRecord.reset()
        MyOrderManager.reset()
        theMarket.reset(start_index=window_size)

        pbar = tqdm()
        while (theMarket.next()):  #main loop, essential

            pbar.update(1)  # simple-GUI

            ################### ROUTINES ###################
            MyOrderManager.orders_check()  #routine, after market.next
            trade_status, other_detail = MyTrader.status_check(
            )  #routine, after orders_check
            ################################################

            ################### GET STATE ##################
            ohlc = theMarket.get_ohlc(size=window_size)
            indicators = theMarket.get_indicators(size=window_size)
            O, H, L, C = gaf_encode(ohlc['Open']), gaf_encode(ohlc['High']), \
                            gaf_encode(ohlc['Low']),gaf_encode(ohlc['Close'])
            #ADX = gaf_encode(indicators['ADX'])
            state = np.stack((O, H, L, C), axis=-1)
            ################################################

            ################## TAKE ACTION #################
            if trade_status == 'TRADE_OVER':

                ############ GET REWARD & TRAIN ################
                if theMarket.get_current_index() > window_size:
                    '''
                    profit = sum(round(order['profit'],5) for order in other_detail if order['profit']>0)
                    loss = sum(round(order['profit'],5) for order in other_detail if order['profit']<0)
                    
                    this_profit_factor = MyRecord.get_profit_factor()
                    this_trade_length = len(MyRecord.get_history())
                    reward = this_profit_factor*np.sqrt(this_trade_length)#SQN
                    '''
                    raw_reward = (MyRecord.get_net_profit() -
                                  profit_history[-1]) / theMarket.get_pip()
                    penalty = 1.0 - 0.1 * len(other_detail)
                    if raw_reward > 0:
                        reward = raw_reward * penalty
                    else:
                        if len(other_detail) == 0:
                            reward = 0
                        else:
                            reward = -np.abs(other_detail[0]['TP'] -
                                             other_detail[0]['price']
                                             ) / theMarket.get_pip()

                    if theMarket.get_current_index(
                    ) >= theMarket.get_data_length(
                    ) - max_idle_limit * max_level_limit[-1]:
                        episode_end = True
                    agent.observe(
                        reward=reward, terminal=episode_end
                    )  # Add experience, agent automatically updates model according to batch size
                    this_reward_history.append(reward)
                    if episode_end == True:
                        if episode % 100 == 0:
                            this_dir = 'save_model/%s/%04d' % (AGENT_METHOD,
                                                               episode)
                            if not os.path.exists(this_dir):
                                os.makedirs(this_dir)
                            agent.save_model(this_dir + '/model')
                        pbar.close()
                        reward_history.append(this_reward_history)
                        with open(
                                'save_model/%s/trades/episode_%04d.pkl' %
                            (AGENT_METHOD, episode), 'wb') as f:
                            pickle.dump(MyRecord.get_history(), f, protocol=-1)
                        break
                action = agent.act(state)  # Get prediction from agent, execute
                SL_pip = SLTP_pips[action['SLTP_pips']] * 2
                TP_pip = SLTP_pips[action['SLTP_pips']]
                MyTrader.set_max_level(
                    max_level_limit[action['max_level_limit']])
                first_order_type = start_order_type[action['start_order_type']]
                ################################################

                MyTrader.new_trade(SL_pip=SL_pip,
                                   TP_pip=TP_pip,
                                   start_order_type=first_order_type)

                round_count += 1
                idle_count = 0
                logging.info(
                    "NewTradeStarted: current net profit=%f (price@%f)" %
                    (MyRecord.get_net_profit(), theMarket.get_market_price()))

            elif trade_status == 'ADD_ORDER':
                last_order = MyTrader.get_orders_detail()[-1]
                if last_order['order_type'] == 'BUY':
                    price = last_order['price'] - theMarket.get_pip(TP_pip)
                elif last_order['order_type'] == 'SELL':
                    price = last_order['price'] + theMarket.get_pip(TP_pip)
                MyTrader.add_reverse_order(price=price,
                                           SL_pip=SL_pip,
                                           TP_pip=TP_pip)
                idle_count = 0

            elif trade_status == 'ERROR':
                logging.warning("SureFireError: order issues...")

            elif trade_status == 'NONE':
                idle_count += 1
                if idle_count >= max_idle_limit:

                    ############ GET REWARD & TRAIN ################
                    '''
                    profit = sum(round(order['profit'],5) for order in other_detail if order['profit']>0)
                    loss = sum(round(order['profit'],5) for order in other_detail if order['profit']<0)
                    
                    this_profit_factor = MyRecord.get_profit_factor()
                    this_trade_length = len(MyRecord.get_history())
                    reward = this_profit_factor*np.sqrt(this_trade_length)#SQN
                    '''
                    raw_reward = (MyRecord.get_net_profit() -
                                  profit_history[-1]) / theMarket.get_pip()
                    penalty = 1.0 - 0.1 * len(other_detail)
                    if raw_reward > 0:
                        reward = raw_reward * penalty
                    else:
                        if len(other_detail) == 0:
                            reward = 0
                        else:
                            reward = -np.abs(other_detail[0]['TP'] -
                                             other_detail[0]['price']
                                             ) / theMarket.get_pip()

                    if theMarket.get_current_index(
                    ) >= theMarket.get_data_length(
                    ) - max_idle_limit * max_level_limit[-1]:
                        episode_end = True
                    agent.observe(
                        reward=reward, terminal=episode_end
                    )  # Add experience, agent automatically updates model according to batch size
                    this_reward_history.append(reward)
                    if episode_end == True:
                        if episode % 100 == 0:
                            this_dir = 'save_model/%s/%04d' % (AGENT_METHOD,
                                                               episode)
                            if not os.path.exists(this_dir):
                                os.makedirs(this_dir)
                            agent.save_model(this_dir + '/model')
                        pbar.close()
                        reward_history.append(this_reward_history)
                        with open(
                                'save_model/%s/trades/episode_%04d.pkl' %
                            (AGENT_METHOD, episode), 'wb') as f:
                            pickle.dump(MyRecord.get_history(), f, protocol=-1)
                        break

                    action = agent.act(
                        state)  # Get prediction from agent, execute
                    SL_pip = SLTP_pips[action['SLTP_pips']] * 2
                    TP_pip = SLTP_pips[action['SLTP_pips']]
                    MyTrader.set_max_level(
                        max_level_limit[action['max_level_limit']])
                    first_order_type = start_order_type[
                        action['start_order_type']]
                    ################################################

                    MyTrader.new_trade(SL_pip=SL_pip,
                                       TP_pip=TP_pip,
                                       start_order_type=first_order_type)
                    idle_count = 0
                    logging.info(
                        "NewTradeStarted: current net profit=%f (price@%f)" %
                        (MyRecord.get_net_profit(),
                         theMarket.get_market_price()))
            ################################################

            profit_history.append(MyRecord.get_net_profit())  #for plotting

        #MyRecord.show_details()
        #print("Rounds of Tradings: %d\n"%round_count)

    #with open('save_model/%s/trades/profit_history.pkl'%AGENT_METHOD, 'wb') as f:
    #pickle.dump(profit_history,f,protocol=-1)

    with open('save_model/%s/trades/reward_history.pkl' % AGENT_METHOD,
              'wb') as f:
        pickle.dump(reward_history, f, protocol=-1)
Ejemplo n.º 18
0
def main():
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)

    console_handler = logging.StreamHandler()
    console_handler.setFormatter(
        logging.Formatter("%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s"))
    logger.addHandler(console_handler)

    parser = argparse.ArgumentParser()

    parser.add_argument('-a', '--agent-config', help="Agent configuration file")
    parser.add_argument('-v', '--vizdoom-config', help="VizDoom configuration file")
    parser.add_argument('-n', '--network-spec', default=None, help="Network specification file")

    args = parser.parse_args()

    if args.agent_config is not None:
        with open(args.agent_config, 'r') as fp:
            agent_config = json.load(fp=fp)
    else:
        raise TensorForceError("No agent configuration provided.")

    if args.network_spec is not None:
        with open(args.network_spec, 'r') as fp:
            network_spec = json.load(fp=fp)
    else:
        network_spec = None
        logger.info("No network configuration provided.")

    if network_spec[0]['type'] == 'conv2d':
        agent_config['states_preprocessing'] = [{'type': 'expand_dims',
                                                 'axis': -1}]
    else:
        agent_config['states_preprocessing'] = [{'type': 'flatten'}]

    logger.info("Start training")

    environment = ViZDoom(args.vizdoom_config)

    agent = Agent.from_spec(
        spec=agent_config,
        kwargs=dict(
            states=environment.states,
            actions=environment.actions,
            network=network_spec,
        )
    )

    runner = Runner(
        agent=agent,
        environment=environment,
        repeat_actions=1
    )

    def episode_finished(r):
        if r.episode % 100 == 0:
            sps = r.timestep / (time.time() - r.start_time)
            logger.info("Finished episode {ep} after {ts} timesteps. Steps Per Second {sps}".format(ep=r.episode,
                                                                                                    ts=r.timestep,
                                                                                                    sps=sps))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Episode timesteps: {}".format(r.episode_timestep))
            logger.info("Average of last 500 rewards: {}".format(sum(r.episode_rewards[-500:]) / 500))
            logger.info("Average of last 100 rewards: {}".format(sum(r.episode_rewards[-100:]) / 100))
        return True

    runner.run(
        timesteps=6000000,
        episodes=1000,
        max_episode_timesteps=10000,
        deterministic=False,
        episode_finished=episode_finished
    )

    terminal = False
    state = environment.reset()
    while not terminal:
        action = agent.act(state)
        state, terminal, reward = environment.execute(action)

    runner.close()
Ejemplo n.º 19
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="Id of the Gym environment")
    parser.add_argument('-a', '--agent', help="Agent configuration file")
    parser.add_argument('-n',
                        '--network',
                        default=None,
                        help="Network specification file")
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=None,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--timesteps',
                        type=int,
                        default=None,
                        help="Number of timesteps")
    parser.add_argument('-m',
                        '--max-episode-timesteps',
                        type=int,
                        default=None,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-d',
                        '--deterministic',
                        action='store_true',
                        help="Choose actions deterministically")
    parser.add_argument('-M',
                        '--mode',
                        choices=('tmux', 'child'),
                        default='tmux',
                        help="Starter mode")
    parser.add_argument('-W',
                        '--num-workers',
                        type=int,
                        default=1,
                        help="Number of worker agents")
    parser.add_argument('-C',
                        '--child',
                        action='store_true',
                        help="Child process")
    parser.add_argument('-P',
                        '--parameter-server',
                        action='store_true',
                        help="Parameter server")
    parser.add_argument('-I',
                        '--task-index',
                        type=int,
                        default=0,
                        help="Task index")
    parser.add_argument('-K',
                        '--kill',
                        action='store_true',
                        help="Kill runners")
    parser.add_argument('-L',
                        '--logdir',
                        default='logs_async',
                        help="Log directory")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        help="Show debug outputs")

    args = parser.parse_args()

    session_name = 'OpenAI-' + args.gym_id
    shell = '/bin/bash'

    kill_cmds = [
        "kill $( lsof -i:12222-{} -t ) > /dev/null 2>&1".format(
            12222 + args.num_workers),
        "tmux kill-session -t {}".format(session_name),
    ]
    if args.kill:
        os.system("\n".join(kill_cmds))
        return 0

    if not args.child:
        # start up child processes
        target_script = os.path.abspath(inspect.stack()[0][1])

        def wrap_cmd(session, name, cmd):
            if isinstance(cmd, list):
                cmd = ' '.join(shlex_quote(str(arg)) for arg in cmd)
            if args.mode == 'tmux':
                return 'tmux send-keys -t {}:{} {} Enter'.format(
                    session, name, shlex_quote(cmd))
            elif args.mode == 'child':
                return '{} > {}/{}.{}.out 2>&1 & echo kill $! >> {}/kill.sh'.format(
                    cmd, args.logdir, session, name, args.logdir)

        def build_cmd(ps, index):
            cmd_args = [
                'CUDA_VISIBLE_DEVICES=', sys.executable, target_script,
                args.gym_id, '--agent',
                os.path.join(os.getcwd(), args.agent), '--network',
                os.path.join(os.getcwd(), args.network), '--num-workers',
                args.num_workers, '--child', '--task-index', index
            ]
            if args.episodes is not None:
                cmd_args.append('--episodes')
                cmd_args.append(args.episodes)
            if args.timesteps is not None:
                cmd_args.append('--timesteps')
                cmd_args.append(args.timesteps)
            if args.max_episode_timesteps is not None:
                cmd_args.append('--max-episode-timesteps')
                cmd_args.append(args.max_episode_timesteps)
            if args.deterministic:
                cmd_args.append('--deterministic')
            if ps:
                cmd_args.append('--parameter-server')
            if args.debug:
                cmd_args.append('--debug')
            return cmd_args

        if args.mode == 'tmux':
            cmds = kill_cmds + [
                'tmux new-session -d -s {} -n ps'.format(session_name)
            ]
        elif args.mode == 'child':
            cmds = [
                'mkdir -p {}'.format(args.logdir),
                'rm -f {}/kill.sh'.format(args.logdir),
                'echo "#/bin/bash" > {}/kill.sh'.format(args.logdir),
                'chmod +x {}/kill.sh'.format(args.logdir)
            ]

        cmds.append(wrap_cmd(session_name, 'ps', build_cmd(ps=True, index=0)))

        for i in xrange(args.num_workers):
            name = 'worker{}'.format(i)
            if args.mode == 'tmux':
                cmds.append('tmux new-window -t {} -n {} -d {}'.format(
                    session_name, name, shell))
            cmds.append(
                wrap_cmd(session_name, name, build_cmd(ps=False, index=i)))

        # add one PS call
        # cmds.append('tmux new-window -t {} -n ps -d {}'.format(session_name, shell))

        print("\n".join(cmds))

        os.system("\n".join(cmds))

        return 0

    ps_hosts = ['127.0.0.1:{}'.format(12222)]
    worker_hosts = []
    port = 12223
    for _ in range(args.num_workers):
        worker_hosts.append('127.0.0.1:{}'.format(port))
        port += 1
    cluster = {'ps': ps_hosts, 'worker': worker_hosts}
    cluster_spec = tf.train.ClusterSpec(cluster)

    environment = OpenAIGym(args.gym_id)

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)  # log_levels[agent.log_level])

    if args.agent is not None:
        with open(args.agent, 'r') as fp:
            agent = json.load(fp=fp)
    else:
        raise TensorForceError("No agent configuration provided.")

    if args.network is not None:
        with open(args.network, 'r') as fp:
            network = json.load(fp=fp)
    else:
        network = None
        logger.info("No network configuration provided.")

    if args.parameter_server:
        agent['device'] = '/job:ps/task:{}'.format(args.task_index)  # '/cpu:0'
    else:
        agent['device'] = '/job:worker/task:{}'.format(
            args.task_index)  # '/cpu:0'

    agent['execution'] = dict(
        type='distributed',
        distributed_spec=dict(cluster_spec=cluster_spec,
                              task_index=args.task_index,
                              job='ps' if args.parameter_server else 'worker',
                              protocol='grpc'))

    agent = Agent.from_spec(spec=agent,
                            kwargs=dict(states=environment.states,
                                        actions=environment.actions,
                                        network=network))

    logger.info("Starting distributed agent for OpenAI Gym '{gym_id}'".format(
        gym_id=args.gym_id))
    logger.info("Config:")
    logger.info(agent)

    runner = Runner(agent=agent, environment=environment, repeat_actions=1)

    if args.debug:  # TODO: Timestep-based reporting
        report_episodes = 1
    else:
        report_episodes = 100

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            steps_per_second = r.timestep / (time.time() - r.start_time)
            logger.info(
                "Finished episode {} after overall {} timesteps. Steps Per Second {}"
                .format(r.agent.episode, r.agent.timestep, steps_per_second))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(
                sum(r.episode_rewards[-500:]) /
                min(500, len(r.episode_rewards))))
            logger.info("Average of last 100 rewards: {}".format(
                sum(r.episode_rewards[-100:]) /
                min(100, len(r.episode_rewards))))
        return True

    runner.run(timesteps=args.timesteps,
               episodes=args.episodes,
               max_episode_timesteps=args.max_episode_timesteps,
               deterministic=args.deterministic,
               episode_finished=episode_finished)
    runner.close()
Ejemplo n.º 20
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--mode', help="ID of the game mode")
    parser.add_argument('--hide',
                        dest='hide',
                        action='store_const',
                        const=True,
                        default=False,
                        help="Hide output window")
    parser.add_argument('-a',
                        '--agent-config',
                        help="Agent configuration file")
    parser.add_argument('-n',
                        '--network-spec',
                        default=None,
                        help="Network specification file")
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=50000,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--max-timesteps',
                        type=int,
                        default=2000,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se',
                        '--save-episodes',
                        type=int,
                        default=100,
                        help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=False,
                        help="Show debug outputs")

    args = parser.parse_args()

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)  # configurable!!!

    environment = MazeExplorer(mode_id=args.mode, visible=not args.hide)

    if args.agent_config is not None:
        with open(args.agent_config, 'r') as fp:
            agent_config = json.load(fp=fp)
    else:
        raise TensorForceError("No agent configuration provided.")

    if args.network_spec is not None:
        with open(args.network_spec, 'r') as fp:
            network_spec = json.load(fp=fp)
    else:
        network_spec = None
        logger.info("No network configuration provided.")

    agent = Agent.from_spec(spec=agent_config,
                            kwargs=dict(states_spec=environment.states,
                                        actions_spec=environment.actions,
                                        network_spec=network_spec))

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.restore_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError(
                    "Cannot save agent to dir {} ()".format(save_dir))

    runner = Runner(agent=agent, environment=environment, repeat_actions=1)

    report_episodes = args.episodes // 1000
    if args.debug:
        report_episodes = 1

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            sps = r.total_timesteps / (time.time() - r.start_time)
            logger.info(
                "Finished episode {ep} after {ts} timesteps. Steps Per Second {sps}"
                .format(ep=r.episode, ts=r.timestep, sps=sps))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(
                sum(r.episode_rewards[-500:]) / 500))
            logger.info("Average of last 100 rewards: {}".format(
                sum(r.episode_rewards[-100:]) / 100))
        return True

    logger.info("Starting {agent} for Environment '{env}'".format(
        agent=agent, env=environment))
    runner.run(args.episodes,
               args.max_timesteps,
               episode_finished=episode_finished)
    runner.close()
    logger.info(
        "Learning finished. Total episodes: {ep}".format(ep=runner.episode))

    if args.monitor:
        environment.gym.monitor.close()
    environment.close()
Ejemplo n.º 21
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('rom', help="File path of the rom")
    parser.add_argument('-a', '--agent-config', help="Agent configuration file")
    parser.add_argument('-n', '--network-spec', default=None, help="Network specification file")
    parser.add_argument('-fs', '--frame-skip', help="Number of frames to repeat action", type=int, default=1)
    parser.add_argument('-rap', '--repeat-action-probability', help="Repeat action probability", type=float, default=0.0)
    parser.add_argument('-lolt', '--loss-of-life-termination', help="Loss of life counts as terminal state", action='store_true')
    parser.add_argument('-lolr', '--loss-of-life-reward', help="Loss of life reward/penalty. EX: -1 to penalize", type=float, default=0.0)
    parser.add_argument('-ds', '--display-screen', action='store_true', default=False, help="Display emulator screen")
    parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes")
    parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs")

    args = parser.parse_args()

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)  # configurable!!!
    logger.addHandler(logging.StreamHandler(sys.stdout))

    environment = ALE(args.rom, frame_skip=args.frame_skip,
                      repeat_action_probability=args.repeat_action_probability,
                      loss_of_life_termination=args.loss_of_life_termination,
                      loss_of_life_reward=args.loss_of_life_reward,
                      display_screen=args.display_screen)

    if args.agent_config is not None:
        with open(args.agent_config, 'r') as fp:
            agent_config = json.load(fp=fp)
    else:
        raise TensorForceError("No agent configuration provided.")

    if args.network_spec is not None:
        with open(args.network_spec, 'r') as fp:
            network_spec = json.load(fp=fp)
    else:
        network_spec = None
        logger.info("No network configuration provided.")

    agent = Agent.from_spec(
        spec=agent_config,
        kwargs=dict(
            states=environment.states,
            actions=environment.actions,
            network=network_spec
        )
    )

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError("Cannot save agent to dir {} ()".format(save_dir))

    runner = Runner(
        agent=agent,
        environment=environment,
        repeat_actions=1
    )

    report_episodes = args.episodes // 1000
    if args.debug:
        report_episodes = 1

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            sps = r.timestep / (time.time() - r.start_time)
            logger.info("Finished episode {ep} after {ts} timesteps. Steps Per Second {sps}".format(ep=r.episode, ts=r.timestep, sps=sps))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(sum(r.episode_rewards[-500:]) / 500))
            logger.info("Average of last 100 rewards: {}".format(sum(r.episode_rewards[-100:]) / 100))
        return True

    logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=environment))
    runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished)
    runner.close()
    logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode))

    environment.close()
Ejemplo n.º 22
0
                            monitor_video=False,
                            visualize=True)
    print os.getcwd()
    with open(
            '/root/catkin_ws/src/drone_training/drone_training/configs/dqn_ue4.json',
            'r') as fp:
        agent = json.load(fp=fp)

    with open(
            '/root/catkin_ws/src/drone_training/drone_training/configs/mynet.json',
            'r') as fp:
        network = json.load(fp=fp)

    agent = Agent.from_spec(spec=agent,
                            kwargs=dict(
                                states=environment.states,
                                actions=environment.actions,
                                network=network,
                            ))
    #    if rospy.get_param("/load"):
    #        load_dir = os.path.dirname(rospy.get_param("/load"))
    #        if not os.path.isdir(load_dir):
    #            raise OSError("Could not load agent from {}: No such directory.".format(load_dir))
    #        agent.restore_model(rospy.get_param("/load"))

    if rospy.get_param("/save"):
        save_dir = os.path.dirname(rospy.get_param("/save"))
        if not os.path.isdir(rospy.get_param("/save")):
            try:
                os.mkdir(rospy.get_param("/save"), 0o755)
            except OSError:
                print(rospy.get_param("/save"))
Ejemplo n.º 23
0
    def __init__(self, arguments):
        self.args = arguments
        if not os.path.isdir("tmp"):
            try:
                os.mkdir("tmp", 0o755)
            except OSError:
                raise OSError("Cannot create directory `tmp`")
        self.logger = get_logger(filename='tmp/train.log',
                                 logger_name='TrainLogger')
        self.logger.debug(self.args)

        self.train, self.scaler = self._get_data()

        # build environment
        self.environment = PortfolioEnv(
            self.train,
            nb_assets=config.NB_ASSETS,
            horizon=self.args.horizon,
            window_size=config.WINDOW_SIZE,
            portfolio_value=config.PORTFOLIO_VALUE,
            assets=config.ASSETS,
            risk_aversion=config.RISK_AVERSION,
            scaler=self.scaler,
            predictor=config.PREDICTION_MODEL,
            cost_buying=config.COST_BUYING,
            cost_selling=config.COST_SELLING,
            action_type=self.args.action_type,
            action_space=self.args.action_space,
            optimized=True,
            num_actions=self.args.num_actions,
            discrete_states=self.args.discrete_states,
            standardize=self.args.standardize_state,
            episodes=self.args.episodes,
            epochs=self.args.epochs,
            random_starts=self.args.random_starts)

        # load agent config
        with open(self.args.agent_config, 'r') as fp:
            self.agent_config = json.load(fp=fp)

        # load network config
        if self.args.net_config:
            with open(self.args.net_config, 'r') as fp:
                self.network_spec = json.load(fp=fp)

        try:
            print(f'Agent spec {self.agent_config}'
                  f'\nNetwork spec {self.network_spec}'
                  f'\nEnvironment spec: {self.environment.env_spec()}\n')

            self.logger.info(
                f'\nAgent spec: {self.agent_config}'
                f'\nNetwork spec: {self.network_spec}'
                f'\nEnvironment spec: {self.environment.env_spec()}\n')
        except Exception:
            # in case of using one of the basic agents
            pass

        # check if the agent can be saved
        if self.args.model_path:
            save_dir = os.path.dirname(self.args.model_path)
            if not os.path.isdir(save_dir):
                try:
                    os.mkdir(save_dir, 0o755)
                except OSError:
                    raise OSError(
                        "Cannot save agent to dir {} ()".format(save_dir))

        # check if training evaluation files can be saved
        if self.args.eval_path:
            save_dir = os.path.dirname(self.args.eval_path)
            if not os.path.isdir(save_dir):
                try:
                    os.mkdir(save_dir, 0o755)
                except OSError:
                    raise OSError(
                        "Cannot save evaluation to dir {} ()".format(save_dir))

        if self.agent_config['type'] == 'trpo_agent':
            # for some reason trpo + tensorboard does not work
            self.logger.warning('TensorBoard will not be supported')

            # build agent
            self.agent = Agent.from_spec(
                spec=self.agent_config,
                kwargs=dict(states_spec=self.environment.states,
                            actions_spec=self.environment.actions,
                            network_spec=self.network_spec))

        else:
            # summary spec for TensorBoard
            self.summary_spec = dict(directory="./board/",
                                     steps=50,
                                     labels=[
                                         'regularization', 'losses',
                                         'variables', 'actions', 'states',
                                         'rewards', 'gradients',
                                         'configuration'
                                     ])

            # build agent
            self.agent = Agent.from_spec(
                spec=self.agent_config,
                kwargs=dict(states_spec=self.environment.states,
                            actions_spec=self.environment.actions,
                            network_spec=self.network_spec,
                            summary_spec=self.summary_spec))

        # if there is a pre trained agent -> continue training
        if self.args.load_agent:
            load_dir = os.path.dirname(self.args.load_agent)
            if not os.path.isdir(load_dir):
                raise OSError(
                    "Could not load agent from {}: No such directory.".format(
                        load_dir))
            self.agent.restore_model(self.args.load_agent)
Ejemplo n.º 24
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="ID of the gym environment")
    parser.add_argument('-a',
                        '--agent-config',
                        help="Agent configuration file")
    parser.add_argument('-n',
                        '--network-spec',
                        default=None,
                        help="Network specification file")
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=50000,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--max-timesteps',
                        type=int,
                        default=2000 * 60,
                        help="Maximum number of timesteps per episode")
    # parser.add_argument('-m', '--monitor', help="Save results to this directory")
    # parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results")
    # parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se',
                        '--save-episodes',
                        type=int,
                        default=100,
                        help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=False,
                        help="Show debug outputs")

    args = parser.parse_args()

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    environment = OpenAIUniverse(args.gym_id)
    environment.configure(remotes=1)

    if args.agent_config is not None:
        with open(args.agent_config, 'r') as fp:
            agent_config = json.load(fp=fp)
    else:
        raise TensorForceError("No agent configuration provided.")

    if args.network_spec:
        with open(args.network_spec, 'r') as fp:
            network_spec = json.load(fp=fp)
    else:
        network_spec = None
        logger.info("No network configuration provided.")

    agent = Agent.from_spec(spec=agent_config,
                            kwargs=dict(states_spec=environment.states,
                                        actions_spec=environment.actions,
                                        network_spec=network_spec))

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    runner = Runner(agent=agent, environment=environment, repeat_actions=1)

    report_episodes = args.episodes // 1000
    if args.debug:
        report_episodes = 1

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            steps_per_second = r.timestep / (time.time() - r.start_time)
            logger.info(
                "Finished episode {} after {} timesteps. Steps Per Second {}".
                format(r.episode, r.episode_timestep, steps_per_second))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(
                sum(r.episode_rewards[-500:]) / 500))
            logger.info("Average of last 100 rewards: {}".format(
                sum(r.episode_rewards[-100:]) / 100))
        return True

    logger.info("Starting {agent} for Environment '{env}'".format(
        agent=agent, env=environment))
    runner.run(args.episodes,
               args.max_timesteps,
               episode_finished=episode_finished)
    runner.close()
    logger.info(
        "Learning finished. Total episodes: {ep}".format(ep=runner.episode))

    # if args.monitor:
    #     environment.gym.monitor.close()
    environment.close()
Ejemplo n.º 25
0
def load_agent(agent_folder):
    with open(os.path.join(agent_folder, "agent.json"), 'r') as fp:
        agent_spec = json.load(fp=fp)
    return Agent.from_spec(agent_spec)
Ejemplo n.º 26
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('rom', help="File path of the rom")
    parser.add_argument('-a',
                        '--agent-config',
                        help="Agent configuration file")
    parser.add_argument('-n',
                        '--network-spec',
                        default=None,
                        help="Network specification file")
    parser.add_argument(
        '-w',
        '--workers',
        help="Number of threads to run where the model is shared",
        type=int,
        default=16)
    parser.add_argument('-fs',
                        '--frame-skip',
                        help="Number of frames to repeat action",
                        type=int,
                        default=1)
    parser.add_argument('-rap',
                        '--repeat-action-probability',
                        help="Repeat action probability",
                        type=float,
                        default=0.0)
    parser.add_argument('-lolt',
                        '--loss-of-life-termination',
                        help="Loss of life counts as terminal state",
                        action='store_true')
    parser.add_argument('-lolr',
                        '--loss-of-life-reward',
                        help="Loss of life reward/penalty. EX: -1 to penalize",
                        type=float,
                        default=0.0)
    parser.add_argument(
        '-ea',
        '--epsilon-annealing',
        help='Create separate epislon annealing schedules per thread',
        action='store_true')
    parser.add_argument('-ds',
                        '--display-screen',
                        action='store_true',
                        default=False,
                        help="Display emulator screen")
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=50000,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--max-timesteps',
                        type=int,
                        default=2000,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se',
                        '--save-episodes',
                        type=int,
                        default=100,
                        help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=False,
                        help="Show debug outputs")

    args = parser.parse_args()

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)  # configurable!!!
    logger.addHandler(logging.StreamHandler(sys.stdout))

    environments = [
        ALE(args.rom,
            frame_skip=args.frame_skip,
            repeat_action_probability=args.repeat_action_probability,
            loss_of_life_termination=args.loss_of_life_termination,
            loss_of_life_reward=args.loss_of_life_reward,
            display_screen=args.display_screen) for _ in range(args.workers)
    ]

    if args.network_spec:
        with open(args.network_spec, 'r') as fp:
            network_spec = json.load(fp=fp)
    else:
        network_spec = None
        logger.info("No network configuration provided.")

    agent_configs = []
    for i in range(args.workers):
        agent_config = json.loads(args.agent_config)

        # Optionally overwrite epsilon final values
        if "exploration" in agent_config and agent_config.exploration[
                "type"] == "epsilon_anneal":
            if args.epsilon_annealing:
                # epsilon final values are [0.5, 0.1, 0.01] with probabilities [0.3, 0.4, 0.3]
                epsilon_final = np.random.choice([0.5, 0.1, 0.01],
                                                 p=[0.3, 0.4, 0.3])
                agent_config.exploration["epsilon_final"] = epsilon_final

        agent_configs.append(agent_config)

    # Let the first agent create the model
    # Manually assign model
    logger.info(agent_configs[0])

    agent = Agent.from_spec(spec=agent_configs[0],
                            kwargs=dict(states_spec=environments[0].states,
                                        actions_spec=environments[0].actions,
                                        network_spec=network_spec))

    agents = [agent]

    for i in xrange(args.workers - 1):
        config = agent_configs[i]
        # Use default config from first agent
        config.default(agent.default_config)

        worker = WorkerAgentGenerator(AgentsDictionary[args.agent])(
            states_spec=environments[0].states,
            actions_spec=environments[0].actions,
            network_spec=network_spec,
            model=agent.model,
            kwargs=config)
        agents.append(worker)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.restore_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_configs[0])

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError(
                    "Cannot save agent to dir {} ()".format(save_dir))

    def episode_finished(stats):
        if args.debug:
            logger.info(
                "Thread {t}. Finished episode {ep} after {ts} timesteps. Reward {r}"
                .format(t=stats['thread_id'],
                        ep=stats['episode'],
                        ts=stats['timestep'],
                        r=stats['episode_reward']))
        return True

    def summary_report(r):
        et = time.time()
        logger.info('=' * 40)
        logger.info('Current Step/Episode: {}/{}'.format(
            r.global_step, r.global_episode))
        logger.info('SPS: {}'.format(r.global_step / (et - r.start_time)))
        reward_list = r.episode_rewards
        if len(reward_list) > 0:
            logger.info('Max Reward: {}'.format(np.max(reward_list)))
            logger.info("Average of last 500 rewards: {}".format(
                sum(reward_list[-500:]) / 500))
            logger.info("Average of last 100 rewards: {}".format(
                sum(reward_list[-100:]) / 100))
        logger.info('=' * 40)

    # Create runners
    threaded_runner = ThreadedRunner(agents,
                                     environments,
                                     repeat_actions=1,
                                     save_path=args.save,
                                     save_episodes=args.save_episodes)

    logger.info("Starting {agent} for Environment '{env}'".format(
        agent=agent, env=environments[0]))
    threaded_runner.run(summary_interval=100,
                        episode_finished=episode_finished,
                        summary_report=summary_report)
    logger.info("Learning finished. Total episodes: {ep}".format(
        ep=threaded_runner.global_episode))

    [environments[t].close() for t in range(args.workers)]
Ejemplo n.º 27
0
app = App()
app.init_screen()
app.render()

config = json.load(open(getenv('CONFIG', 'ppo.json')))
max_episodes = config.pop('max_episodes', None)
max_timesteps = config.pop('max_timesteps', None)
max_episode_timesteps = config.pop('max_episode_timesteps')
network_spec = config.pop('network')

agent = Agent.from_spec(spec=config,
                        kwargs=dict(
                            states=dict(type='float',
                                        shape=(len(app.get_state()), )),
                            actions={
                                'accel': dict(type='int', num_actions=3),
                                'turn': dict(type='int', num_actions=3),
                            },
                            network=network_spec))

logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)

episode = 0

MAX_FRAMES_WITHOUT_REWARD = 200.

while True:
    episode += 1
    # XXX: make random switch
    app.checkpoints = app.checkpoints[::-1]
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('run_number', help="Consecutive number of this run")
    parser.add_argument('gym_id', help="Id of the Gym environment")
    # parser.add_argument('-i', '--import-modules', help="Import module(s) required for environment")
    parser.add_argument('-a', '--agent', help="Agent configuration file")
    parser.add_argument('-n',
                        '--network',
                        default=None,
                        help="Network specification file")
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=None,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--timesteps',
                        type=int,
                        default=None,
                        help="Number of timesteps")
    parser.add_argument('-m',
                        '--max-episode-timesteps',
                        type=int,
                        default=None,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-d',
                        '--deterministic',
                        action='store_true',
                        default=False,
                        help="Choose actions deterministically")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se',
                        '--save-episodes',
                        type=int,
                        default=100,
                        help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-rl', '--reward-level', type=int, default=3)
    parser.add_argument('-rn', '--random-level', type=int, default=3)
    # parser.add_argument('--monitor', help="Save results to this directory")
    # parser.add_argument('--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results")
    # parser.add_argument('--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)")
    parser.add_argument('--visualize',
                        action='store_true',
                        default=False,
                        help="Enable OpenAI Gym's visualization")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=False,
                        help="Show debug outputs")
    parser.add_argument('-te',
                        '--test',
                        action='store_true',
                        default=False,
                        help="Test agent without learning.")
    parser.add_argument('-sd',
                        '--seed',
                        type=int,
                        default=None,
                        help='Random seed for this trial')
    parser.add_argument('-rp',
                        '--repeat',
                        type=int,
                        default=6,
                        help='How many times to repeat an action')
    # parser.add_argument('-sl', '--sleep', type=float, default=None, help="Slow down simulation by sleeping for x seconds (fractions allowed).")
    # parser.add_argument('--job', type=str, default=None, help="For distributed mode: The job type of this agent.")
    # parser.add_argument('--task', type=int, default=0, help="For distributed mode: The task index of this agent.")

    args = parser.parse_args()

    # SET BASIC PARAMETERS

    random_seed = args.seed
    agent_save_period = args.save_episodes
    visualize_period = 10
    run_number = args.run_number

    load_agent = False
    if args.load:
        load_agent = True
        agent_filename = args.load
    else:
        load_agent = False

    if args.visualize:
        to_visualize = True
    else:
        to_visualize = False

    # Set logging level
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # if args.import_modules is not None:
    #    for module in args.import_modules.split(','):
    #        importlib.import_module(name=module)

    environment = OpenAIGym(
        gym_id=args.gym_id,
        monitor=
        'C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\Box2dEnv\\savemonitor\\',
        monitor_safe=False,
        monitor_video=0,
        visualize=False
        # True to visualize first run. Otherwise visualisation is set in episode_finished() method
    )

    # Set random seed for environment
    environment.gym.seed(random_seed)
    environment.gym.unwrapped.set_reward(args.reward_level)
    environment.gym.unwrapped.set_random(args.random_level)
    # Initialize Agent-Network-Model objects

    # with open('C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\examples\\configs\\ppo.json', 'r') as fp:
    #     agent = json.load(fp=fp)
    with open(
            'C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\examples\\configs\\{}'.
            format(args.agent), 'r') as fp:
        agent = json.load(fp=fp)

    # with open('C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\examples\\configs\\mlp2_network.json', 'r') as fp:
    #     network = json.load(fp=fp)
    with open(
            'C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\examples\\configs\\{}'.
            format(args.network), 'r') as fp:
        network = json.load(fp=fp)

    agent = Agent.from_spec(spec=agent,
                            kwargs=dict(states=environment.states,
                                        actions=environment.actions,
                                        network=network,
                                        random_seed=random_seed))
    if load_agent:
        agent.restore_model(
            directory=
            'C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\Box2dEnv\\saves\\modelSave',
            file=agent_filename)

    runner = Runner(agent=agent,
                    environment=environment,
                    repeat_actions=args.repeat)

    report_frequently = True
    if report_frequently:
        report_episodes = 1
    else:
        report_episodes = 100

    logger.info("Starting {agent} for Environment '{env}'".format(
        agent=agent, env=environment))

    # Naming variables
    nNum = str(run_number).zfill(3)
    task = environment.gym.unwrapped.task
    if task == 'LIFT':
        nTask = 'L'
    else:
        nTask = 'P'
    nReward = environment.gym.unwrapped.reward_level
    nRandom = environment.gym.unwrapped.rand_level
    nSeed = str(random_seed).zfill(2)
    nAlg = 'PPO'

    nName = ("{}-{}{}{}-{}-{}".format(nNum, nTask, nReward, nRandom, nSeed,
                                      nAlg))

    def episode_finished(r, id_):

        # if r.episode == 1:
        # r.agent.restore_model('C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\Box2dEnv\\saves\\modelSave')

        if r.episode % visualize_period == 0:
            if to_visualize:
                environment.visualize = True  # Set to true to visualize
        else:
            environment.visualize = False

        save_period = 20
        if r.episode % save_period == 0:
            with open(
                    'C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\Box2dEnv\\saves\\{}.csv'
                    .format(nName), 'a+') as csv:
                for reward in r.episode_rewards[-save_period:]:
                    csv.write("{:2.2f}\n".format(reward))
                # print("Saving, yo!")

        if r.episode % report_episodes == 0:
            # steps_per_second = r.timestep / (time.time() - r.start_time)
            # logger.info("Finished episode {:d} after {:d} timesteps. Steps Per Second {:0.2f}".format(
            #    r.agent.episode, r.episode_timestep, steps_per_second
            # ))
            # logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            # logger.info("Average of last 500 rewards: {:0.2f}".
            #            format(sum(r.episode_rewards[-500:]) / min(500, len(r.episode_rewards))))
            # logger.info("Average of last 100 rewards: {:0.2f}".
            #            format(sum(r.episode_rewards[-100:]) / min(100, len(r.episode_rewards))))
            logger.info("{:6d},    {:+6.2f},     {:+6.2f}".format(
                r.agent.episode, r.episode_rewards[-1],
                sum(r.episode_rewards[-100:]) /
                min(100, len(r.episode_rewards))))

        if r.episode == 1 or (r.episode % agent_save_period == 0):
            logger.info("Saving agent to {} at episode {}".format(
                'C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\Box2dEnv\\saves\\{}'
                .format(nName), r.episode))
            r.agent.save_model(
                directory=
                'C:\\Users\\genia\\Source\\Repos\\Box2dEnv\\Box2dEnv\\saves\\modelSave\\{}-{}'
                .format(nName, r.episode),
                append_timestep=False)

        return True

    runner.run(num_timesteps=args.timesteps,
               num_episodes=args.episodes,
               max_episode_timesteps=args.max_episode_timesteps,
               deterministic=args.deterministic,
               episode_finished=episode_finished,
               testing=args.test,
               sleep=None)
    runner.close()

    logger.info("Learning finished. Total episodes: {ep}".format(
        ep=runner.agent.episode))
Ejemplo n.º 29
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="Id of the Gym environment")
    parser.add_argument('-i',
                        '--import-modules',
                        help="Import module(s) required for environment")
    parser.add_argument('-a', '--agent', help="Agent configuration file")
    parser.add_argument('-n',
                        '--network',
                        default=None,
                        help="Network specification file")
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=None,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--timesteps',
                        type=int,
                        default=None,
                        help="Number of timesteps")
    parser.add_argument('-m',
                        '--max-episode-timesteps',
                        type=int,
                        default=None,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-d',
                        '--deterministic',
                        action='store_true',
                        default=False,
                        help="Choose actions deterministically")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se',
                        '--save-episodes',
                        type=int,
                        default=100,
                        help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('--monitor', help="Save results to this directory")
    parser.add_argument('--monitor-safe',
                        action='store_true',
                        default=False,
                        help="Do not overwrite previous results")
    parser.add_argument('--monitor-video',
                        type=int,
                        default=0,
                        help="Save video every x steps (0 = disabled)")
    parser.add_argument('--visualize',
                        action='store_true',
                        default=False,
                        help="Enable OpenAI Gym's visualization")
    parser.add_argument('-D',
                        '--debug',
                        action='store_true',
                        default=False,
                        help="Show debug outputs")
    parser.add_argument('-te',
                        '--test',
                        action='store_true',
                        default=False,
                        help="Test agent without learning.")
    parser.add_argument(
        '-sl',
        '--sleep',
        type=float,
        default=None,
        help=
        "Slow down simulation by sleeping for x seconds (fractions allowed).")
    parser.add_argument(
        '--job',
        type=str,
        default=None,
        help="For distributed mode: The job type of this agent.")
    parser.add_argument(
        '--task',
        type=int,
        default=0,
        help="For distributed mode: The task index of this agent.")

    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO)

    logger = logging.getLogger(__file__)
    logger.setLevel(logging.INFO)

    if args.import_modules is not None:
        for module in args.import_modules.split(','):
            importlib.import_module(name=module)

    environment = OpenAIGym(gym_id=args.gym_id,
                            monitor=args.monitor,
                            monitor_safe=args.monitor_safe,
                            monitor_video=args.monitor_video,
                            visualize=args.visualize)

    if args.agent is not None:
        with open(args.agent, 'r') as fp:
            agent = json.load(fp=fp)
    else:
        raise TensorForceError("No agent configuration provided.")

    if args.network is not None:
        with open(args.network, 'r') as fp:
            network = json.load(fp=fp)
    else:
        network = None
        logger.info("No network configuration provided.")

    agent = Agent.from_spec(spec=agent,
                            kwargs=dict(
                                states=environment.states,
                                actions=environment.actions,
                                network=network,
                            ))

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError(
                "Could not load agent from {}: No such directory.".format(
                    load_dir))
        agent.restore_model(args.load)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError(
                    "Cannot save agent to dir {} ()".format(save_dir))

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent)

    runner = Runner(agent=agent, environment=environment, repeat_actions=1)

    if args.debug:  # TODO: Timestep-based reporting
        report_episodes = 1
    else:
        report_episodes = 100

    logger.info("Starting {agent} for Environment '{env}'".format(
        agent=agent, env=environment))

    def episode_finished(r, id_):
        if r.episode % report_episodes == 0:
            steps_per_second = r.timestep / (time.time() - r.start_time)
            logger.info(
                "Finished episode {:d} after {:d} timesteps. Steps Per Second {:0.2f}"
                .format(r.agent.episode, r.episode_timestep, steps_per_second))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {:0.2f}".format(
                sum(r.episode_rewards[-500:]) /
                min(500, len(r.episode_rewards))))
            logger.info("Average of last 100 rewards: {:0.2f}".format(
                sum(r.episode_rewards[-100:]) /
                min(100, len(r.episode_rewards))))
        if args.save and args.save_episodes is not None and not r.episode % args.save_episodes:
            logger.info("Saving agent to {}".format(args.save))
            r.agent.save_model(args.save)

        return True

    runner.run(num_timesteps=args.timesteps,
               num_episodes=args.episodes,
               max_episode_timesteps=args.max_episode_timesteps,
               deterministic=args.deterministic,
               episode_finished=episode_finished,
               testing=args.test,
               sleep=args.sleep)
    runner.close()

    logger.info("Learning finished. Total episodes: {ep}".format(
        ep=runner.agent.episode))
def main():
    parser = argparse.ArgumentParser()
    # Gym arguments
    parser.add_argument('-g', '--gym', help="Gym environment id")
    parser.add_argument('-i',
                        '--import-modules',
                        help="Import module(s) required for gym environment")
    parser.add_argument('--monitor',
                        type=str,
                        default=None,
                        help="Save results to this directory")
    parser.add_argument('--monitor-safe',
                        action='store_true',
                        default=False,
                        help="Do not overwrite previous results")
    parser.add_argument('--monitor-video',
                        type=int,
                        default=0,
                        help="Save video every x steps (0 = disabled)")
    parser.add_argument('--visualize',
                        action='store_true',
                        default=False,
                        help="Enable OpenAI Gym's visualization")
    # Agent arguments
    parser.add_argument('-a', '--agent', help="Agent configuration file")
    parser.add_argument('-n',
                        '--network',
                        default=None,
                        help="Network specification file")
    # Runner arguments
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=None,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--timesteps',
                        type=int,
                        default=None,
                        help="Number of timesteps")
    parser.add_argument('-m',
                        '--max-episode-timesteps',
                        type=int,
                        default=None,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-d',
                        '--deterministic',
                        action='store_true',
                        default=False,
                        help="Choose actions deterministically")
    args = parser.parse_args()

    if args.import_modules is not None:
        for module in args.import_modules.split(','):
            importlib.import_module(name=module)

    environment = OpenAIGym(gym_id=args.gym,
                            monitor=args.monitor,
                            monitor_safe=args.monitor_safe,
                            monitor_video=args.monitor_video,
                            visualize=args.visualize)

    agent = Agent.from_spec(spec=args.agent,
                            states=environment.states(),
                            actions=environment.actions(),
                            network=args.network)

    runner = Runner(agent=agent, environment=environment)

    def callback(r):
        if r.episode % 100 == 0:
            print("================================================\n"
                  "Average secs/episode over 100 episodes: {time:0.2f}\n"
                  "Average steps/sec over 100 episodes:    {timestep:0.2f}\n"
                  "Average reward over 100 episodes:       {reward100:0.2f}\n"
                  "Average reward over 500 episodes:       {reward500:0.2f}".
                  format(time=(sum(r.episode_times[-100:]) / 100.0),
                         timestep=(sum(r.episode_timesteps[-100:]) /
                                   sum(r.episode_times[-100:])),
                         reward100=(sum(r.episode_rewards[-100:]) /
                                    min(100.0, r.episode)),
                         reward500=(sum(r.episode_rewards[-500:]) /
                                    min(500.0, r.episode))))
        return True

    runner.run(num_timesteps=args.timesteps,
               num_episodes=args.episodes,
               max_episode_timesteps=args.max_episode_timesteps,
               deterministic=args.deterministic,
               callback=callback)

    runner.close()