Example #1
0
    def test_convergence_sgd_one_freeze(self):
        freeze_interval = 500
        net = q_network.DeepQLearner(self.mdp.num_states, 1,
                                     self.mdp.num_actions, 1, self.discount,
                                     self.learning_rate, 0, 0, freeze_interval,
                                     1, 'linear', 'sgd', 1.0)

        self.train(net, freeze_interval * 2)

        numpy.testing.assert_almost_equal(
            self.all_q_vals(net), [[.7, 0], [.35, .5], [0, 1.0], [0., 0.]], 3)
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    ale = Snake(display=True)

    num_actions = ale.nactions

    if parameters.nn_file is None:
        network = q_network.DeepQLearner(
            defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions,
            parameters.phi_length, parameters.discount,
            parameters.learning_rate, parameters.rms_decay,
            parameters.rms_epsilon, parameters.momentum, parameters.clip_delta,
            parameters.freeze_interval, parameters.batch_size,
            parameters.network_type, parameters.update_rule,
            parameters.batch_accumulator, rng)
    else:
        handle = open(parameters.nn_file, 'r')
        network = cPickle.load(handle)

    agent = ale_agent.NeuralAgent(
        network, parameters.epsilon_start, parameters.epsilon_min,
        parameters.epsilon_decay, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.replay_start_size,
        parameters.update_frequency, rng)

    experiment = ale_experiment.ALEExperiment(
        ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT,
        parameters.resize_method, parameters.epochs,
        parameters.steps_per_epoch, parameters.steps_per_test,
        parameters.frame_skip, parameters.death_ends_episode,
        parameters.max_start_nullops, rng)

    experiment.run()
    def sarsa_init(self, args):

        random_seed = random.randint(0, 20)  #0-19
        rng = np.random.RandomState(random_seed)

        #Check the presence of NN file for sarsa when in play mode
        if ((args.handle == 'play') and (args.nn_file is None)):
            raise Exception('Error: no SARSA NN file to load')

        if args.nn_file is None:
            # New network creation
            self.logger.info("Creating network for SARSA")
            sarsa_network = q_network.DeepQLearner(
                args.screen_width,
                args.screen_height,
                self.actionsB,
                args.phi_length,  #num_frames
                args.discount,
                args.learning_rate,
                args.rms_decay,  #rho
                args.rms_epsilon,
                args.momentum_sarsa,
                1,  #clip_delta
                10000,  #freeze_interval
                args.batch_size,  #batch_size
                args.network_type,
                args.update_rule,
                # args.lambda_decay, #batch_accumulator
                'sum',
                rng)
        else:
            #Pretrained network loading
            #Mandatory for play mode, optional for training
            network_file_handle = open(args.nn_file, 'r')
            sarsa_network = cPickle.load(network_file_handle)

        self.logger.info("Creating SARSA agent")
        sarsa_agent_inst = SARSALambdaAgent(sarsa_network, args,
                                            args.epsilon_min,
                                            args.epsilon_decay,
                                            args.experiment_prefix,
                                            self.logger, rng)

        return sarsa_agent_inst
Example #4
0
    def test_updates_sgd_no_freeze(self):
        freeze_interval = -1
        net = q_network.DeepQLearner(self.mdp.num_states, 1,
                                     self.mdp.num_actions, 1, self.discount,
                                     self.learning_rate, 0, 0, freeze_interval,
                                     1, 'linear', 'sgd', 1.0)
        mdp = self.mdp

        # Depart left:
        state = mdp.states[0]
        action_index = 0
        reward, next_state, terminal = mdp.act(state, action_index)
        net.train(state, mdp.actions[action_index], reward, next_state,
                  mdp.terminal)

        numpy.testing.assert_almost_equal(self.all_q_vals(net),
                                          [[.07, 0], [0, 0], [0, 0], [0, 0]])

        # Depart right:
        state = mdp.states[-2]
        action_index = 1
        reward, next_state, terminal = mdp.act(state, action_index)
        net.train(state, mdp.actions[action_index], reward, next_state,
                  mdp.terminal)

        numpy.testing.assert_almost_equal(self.all_q_vals(net),
                                          [[.07, 0], [0, 0], [0, .1], [0, 0]])

        # Move into leftmost state
        state = mdp.states[1]
        action_index = 0
        reward, next_state, terminal = mdp.act(state, action_index)
        net.train(state, mdp.actions[action_index], reward, next_state,
                  mdp.terminal)

        numpy.testing.assert_almost_equal(
            self.all_q_vals(net), [[.07, 0], [0.0035, 0], [0, .1], [0, 0]])
Example #5
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    # dump parameters for replication
    time_str = time.strftime("%Y-%m-%d_%H-%M_", time.localtime())
    exp_dir = time_str + parameters.experiment_prefix
    exp_dir = os.path.join("results", exp_dir)
    if not os.path.isdir(exp_dir):
        os.makedirs(exp_dir)
    parameter_file = open(os.path.join(exp_dir, 'parameter.txt'), 'w', 0)
    parameter_file.write(str(parameters))
    parameter_file.flush()
    parameter_file.close()

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    avail_actions = ale.getMinimalActionSet()
    if parameters.train_all:
        num_actions = len(ale.getLegalActionSet())
    else:
        num_actions = len(avail_actions)

    print "avail_actions: " + str(avail_actions)
    print "num_actions: " + str(num_actions)

    if parameters.nn_file is None:
        network = q_network.DeepQLearner(
            defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, avail_actions,
            num_actions, parameters.phi_length, parameters.discount,
            parameters.learning_rate, parameters.rms_decay,
            parameters.rms_epsilon, parameters.momentum, parameters.clip_delta,
            parameters.freeze_interval, parameters.batch_size,
            parameters.network_type, parameters.update_rule,
            parameters.batch_accumulator, rng, parameters.train_all)
    else:
        handle = open(parameters.nn_file, 'r')
        network = cPickle.load(handle)

    agent = ale_agent.NeuralAgent(
        network, parameters.epsilon_start, parameters.epsilon_min,
        parameters.epsilon_decay, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.replay_start_size,
        parameters.update_frequency, rng, exp_dir, parameters.train_all)

    experiment = ale_experiment.ALEExperiment(
        ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT,
        parameters.resize_method, parameters.epochs,
        parameters.steps_per_epoch, parameters.steps_per_test,
        parameters.frame_skip, parameters.death_ends_episode,
        parameters.max_start_nullops, rng)

    experiment.run()
Example #6
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    if parameters.nn_file is None:
        print 'building network...'
        network = q_network.DeepQLearner(
            defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions,
            parameters.phi_length, parameters.discount,
            parameters.learning_rate, parameters.rms_decay,
            parameters.rms_epsilon, parameters.momentum, parameters.clip_delta,
            parameters.freeze_interval, parameters.batch_size,
            parameters.network_type, parameters.update_rule,
            parameters.batch_accumulator, rng)
    else:
        print 'loading network...'
        handle = open(parameters.nn_file, 'r')
        network = cPickle.load(handle)

    print 'building agent...'
    if parameters.aws_secret_key and parameters.aws_access_key and parameters.s3_bucket:
        s3_utility = S3Utility(parameters.aws_access_key,
                               parameters.aws_secret_key, parameters.s3_bucket)
    else:
        s3_utility = None

    agent = ale_agent.NeuralAgent(
        network, parameters.epsilon_start, parameters.epsilon_min,
        parameters.epsilon_decay, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.replay_start_size,
        parameters.update_frequency, rng, s3_utility)

    print 'building experiment...'
    experiment = ale_experiment.ALEExperiment(
        ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT,
        parameters.resize_method, parameters.epochs,
        parameters.steps_per_epoch, parameters.steps_per_test,
        parameters.frame_skip, parameters.death_ends_episode,
        parameters.max_start_nullops, rng)

    print 'running experiment...'
    experiment.run()
Example #7
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ## Here !!!
    if isinstance(parameters.record_screen_dir, str):
        if len(parameters.record_screen_dir):
            ale.setString('record_screen_dir', parameters.record_screen_dir)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    if parameters.nn_file is None:
        network = q_network.DeepQLearner(
            defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions,
            parameters.phi_length, parameters.discount,
            parameters.learning_rate, parameters.rms_decay,
            parameters.rms_epsilon, parameters.momentum, parameters.clip_delta,
            parameters.freeze_interval, parameters.batch_size,
            parameters.network_type, parameters.update_rule,
            parameters.batch_accumulator, rng)
    else:
        with open(parameters.nn_file, 'r') as handle:
            network = cPickle.load(handle)

    agent = ale_agent.NeuralAgent(
        network, parameters.epsilon_start, parameters.epsilon_min,
        parameters.epsilon_decay, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.replay_start_size,
        parameters.update_frequency, rng)

    ## Get the coach: let it have read/write access to the agent's databanks
    coach = None
    if parameters.nn_coach_file is not None:
        with open(parameters.nn_coach_file, 'r') as handle:
            network = cPickle.load(handle)
        coach = ale_coach.NeuralCoach(network, agent.get_training_dataset(),
                                      parameters.coach_epsilon, rng)

    experiment = ale_experiment.ALEExperiment(ale,
                                              agent,
                                              defaults.RESIZED_WIDTH,
                                              defaults.RESIZED_HEIGHT,
                                              parameters.resize_method,
                                              parameters.epochs,
                                              parameters.steps_per_epoch,
                                              parameters.steps_per_test,
                                              parameters.frame_skip,
                                              parameters.death_ends_episode,
                                              parameters.max_start_nullops,
                                              rng,
                                              coach=coach)

    experiment.run()
Example #8
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    #   if parameters.rom.endswith('.bin'):
    #       rom = parameters.rom
    #   else:
    #       rom = "%s.bin" % parameters.rom
    #   full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

#   ale = ale_python_interface.ALEInterface()
#   ale.setInt('random_seed', rng.randint(1000))

#   if parameters.display_screen:
#       import sys
#       if sys.platform == 'darwin':
#           import pygame
#           pygame.init()
#           ale.setBool('sound', False) # Sound doesn't work on OSX

#  ale.setBool('display_screen', parameters.display_screen)
# ale.setFloat('repeat_action_probability',
#               parameters.repeat_action_probability)

#  ale.loadROM(full_rom_path)

#  num_actions = len(ale.getMinimalActionSet())
    num_actions = 232
    if parameters.nn_file is None:
        network = q_network.DeepQLearner(
            26, 34, num_actions, parameters.phi_length, parameters.discount,
            parameters.learning_rate, parameters.rms_decay,
            parameters.rms_epsilon, parameters.momentum, parameters.clip_delta,
            parameters.freeze_interval, parameters.batch_size,
            parameters.network_type, parameters.update_rule,
            parameters.batch_accumulator, rng)
    else:
        handle = open(parameters.nn_file, 'r')
        network = cPickle.load(handle)

    agent = dialogue_agent.NeuralAgent(
        network, parameters.epsilon_start, parameters.epsilon_min,
        parameters.epsilon_decay, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.replay_start_size,
        parameters.update_frequency, rng)
    f = open('state_act_prob.pickle', 'rb')
    state_act_prob = pickle.load(f)
    f.close()
    (data, steps_per_epoch) = readData()
    experiment = dialogue_experiment.ALEExperiment(
        agent, data, state_act_prob, 26, 34, parameters.resize_method,
        parameters.test, parameters.epochs, steps_per_epoch,
        parameters.steps_per_test, parameters.frame_skip,
        parameters.death_ends_episode, parameters.max_start_nullops, rng)

    experiment.run()
Example #9
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(parameters.Seed)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd_data = 'deterministic'
        theano.config.dnn.conv.algo_bwd_filter = 'deterministic'

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    agent = None

    if not parameters.close2:
        print 'transition length is ', parameters.transition_length, 'transition range is', parameters.transition_range
    if parameters.method == 'ot':
        if parameters.nn_file is None:
            network = q_network.DeepQLearner(
                defaults.RESIZED_WIDTH,
                defaults.RESIZED_HEIGHT,
                num_actions,
                parameters.phi_length,
                parameters.discount,
                parameters.learning_rate,
                parameters.rms_decay,
                parameters.rms_epsilon,
                parameters.momentum,
                parameters.clip_delta,
                parameters.freeze_interval,
                parameters.batch_size,
                parameters.network_type,
                parameters.update_rule,
                parameters.batch_accumulator,
                rng,
                double=parameters.double_dqn,
                transition_length=parameters.transition_length)
        else:
            handle = open(parameters.nn_file, 'r')
            network = cPickle.load(handle)

        agent = ale_agents.OptimalityTightening(
            network, parameters.epsilon_start, parameters.epsilon_min,
            parameters.epsilon_decay, parameters.replay_memory_size,
            parameters.experiment_prefix, parameters.update_frequency,
            parameters.replay_start_size, rng, parameters.transition_length,
            parameters.transition_range, parameters.penalty_method,
            parameters.weight_min, parameters.weight_max,
            parameters.annealing_len, parameters.beta, parameters.two_train,
            parameters.late2, parameters.close2, parameters.verbose,
            parameters.double_dqn, parameters.save_pkl)

    experiment = ale_experiment.ALEExperiment(
        ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT,
        parameters.resize_method, parameters.epochs,
        parameters.steps_per_epoch, parameters.steps_per_test,
        parameters.frame_skip, parameters.death_ends_episode,
        parameters.max_start_nullops, rng, parameters.flickering_buffer_size)

    experiment.run()
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False) # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    agent = None

    if parameters.method == 'ec_dqn':
        if parameters.nn_file is None:
            network = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                             defaults.RESIZED_HEIGHT,
                                             num_actions,
                                             parameters.phi_length,
                                             parameters.discount,
                                             parameters.learning_rate,
                                             parameters.rms_decay,
                                             parameters.rms_epsilon,
                                             parameters.momentum,
                                             parameters.clip_delta,
                                             parameters.freeze_interval,
                                             parameters.batch_size,
                                             parameters.network_type,
                                             parameters.update_rule,
                                             parameters.batch_accumulator,
                                             rng, use_ec=True, double=parameters.double_dqn)
        else:
            handle = open(parameters.nn_file, 'r')
            network = cPickle.load(handle)

        if parameters.qec_table is None:
            qec_table = EC_functions.QECTable(parameters.knn,
                                              parameters.state_dimension,
                                              parameters.projection_type,
                                              defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT,
                                              parameters.buffer_size,
                                              num_actions,
                                              rng,
                                              parameters.rebuild_knn_frequency)
        else:
            handle = open(parameters.qec_table, 'r')
            qec_table = cPickle.load(handle)

        agent = ale_agents.EC_DQN(network,
                                  qec_table,
                                  parameters.epsilon_start,
                                  parameters.epsilon_min,
                                  parameters.epsilon_decay,
                                  parameters.replay_memory_size,
                                  parameters.experiment_prefix,
                                  parameters.replay_start_size,
                                  parameters.update_frequency,
                                  parameters.ec_discount,
                                  num_actions,
                                  parameters.ec_testing,
                                  rng)

    if parameters.method == 'dqn_episodic_memory1':
        if parameters.nn_file is None:
            network = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                             defaults.RESIZED_HEIGHT,
                                             num_actions,
                                             parameters.phi_length,
                                             parameters.discount,
                                             parameters.learning_rate,
                                             parameters.rms_decay,
                                             parameters.rms_epsilon,
                                             parameters.momentum,
                                             parameters.clip_delta,
                                             parameters.freeze_interval,
                                             parameters.batch_size,
                                             parameters.network_type,
                                             parameters.update_rule,
                                             parameters.batch_accumulator,
                                             rng, use_episodic_mem=True, double=parameters.double_dqn)
        else:
            handle = open(parameters.nn_file, 'r')
            network = cPickle.load(handle)

        if parameters.qec_table is None:
            qec_table = EC_functions.QECTable(parameters.knn,
                                              parameters.state_dimension,
                                              parameters.projection_type,
                                              defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT,
                                              parameters.buffer_size,
                                              num_actions,
                                              rng,
                                              parameters.rebuild_knn_frequency)
        else:
            handle = open(parameters.qec_table, 'r')
            qec_table = cPickle.load(handle)

        agent = ale_agents.NeuralNetworkEpisodicMemory1(network,
                                                        qec_table,
                                                        parameters.epsilon_start,
                                                        parameters.epsilon_min,
                                                        parameters.epsilon_decay,
                                                        parameters.replay_memory_size,
                                                        parameters.experiment_prefix,
                                                        parameters.replay_start_size,
                                                        parameters.update_frequency,
                                                        parameters.ec_discount,
                                                        num_actions,
                                                        parameters.ec_testing,
                                                        rng)
    if parameters.method == 'dqn_episodic_memory2':
        if parameters.nn_file is None:
            network = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                             defaults.RESIZED_HEIGHT,
                                             num_actions,
                                             parameters.phi_length,
                                             parameters.discount,
                                             parameters.learning_rate,
                                             parameters.rms_decay,
                                             parameters.rms_epsilon,
                                             parameters.momentum,
                                             parameters.clip_delta,
                                             parameters.freeze_interval,
                                             parameters.batch_size,
                                             parameters.network_type,
                                             parameters.update_rule,
                                             parameters.batch_accumulator,
                                             rng, use_episodic_mem=True, double=parameters.double_dqn)
        else:
            handle = open(parameters.nn_file, 'r')
            network = cPickle.load(handle)

        if parameters.qec_table is None:
            qec_table = EC_functions.QECTable(parameters.knn,
                                              parameters.state_dimension,
                                              parameters.projection_type,
                                              defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT,
                                              parameters.buffer_size,
                                              num_actions,
                                              rng,
                                              parameters.rebuild_knn_frequency)
        else:
            handle = open(parameters.qec_table, 'r')
            qec_table = cPickle.load(handle)

    if parameters.method == 'dqn_episodic_memory3':
        if parameters.nn_file is None:
            network = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                             defaults.RESIZED_HEIGHT,
                                             num_actions,
                                             parameters.phi_length,
                                             parameters.discount,
                                             parameters.learning_rate,
                                             parameters.rms_decay,
                                             parameters.rms_epsilon,
                                             parameters.momentum,
                                             parameters.clip_delta,
                                             parameters.freeze_interval,
                                             parameters.batch_size,
                                             parameters.network_type,
                                             parameters.update_rule,
                                             parameters.batch_accumulator,
                                             rng, use_episodic_mem=True, double=parameters.double_dqn)
        else:
            handle = open(parameters.nn_file, 'r')
            network = cPickle.load(handle)

        if parameters.qec_table is None:
            qec_table = EC_functions.LshHash(parameters.state_dimension,
                                             defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT,
                                             parameters.buffer_size,
                                             rng)
        else:
            handle = open(parameters.qec_table, 'r')
            qec_table = cPickle.load(handle)

        agent = ale_agents.NeuralNetworkEpisodicMemory3(network,
                                                        qec_table,
                                                        parameters.epsilon_start,
                                                        parameters.epsilon_min,
                                                        parameters.epsilon_decay,
                                                        parameters.replay_memory_size,
                                                        parameters.experiment_prefix,
                                                        parameters.replay_start_size,
                                                        parameters.update_frequency,
                                                        parameters.ec_discount,
                                                        num_actions,
                                                        parameters.ec_testing,
                                                        rng)

    if parameters.method == 'dqn':
        if parameters.nn_file is None:
            network = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                             defaults.RESIZED_HEIGHT,
                                             num_actions,
                                             parameters.phi_length,
                                             parameters.discount,
                                             parameters.learning_rate,
                                             parameters.rms_decay,
                                             parameters.rms_epsilon,
                                             parameters.momentum,
                                             parameters.clip_delta,
                                             parameters.freeze_interval,
                                             parameters.batch_size,
                                             parameters.network_type,
                                             parameters.update_rule,
                                             parameters.batch_accumulator,
                                             rng, double=parameters.double_dqn)
        else:
            handle = open(parameters.nn_file, 'r')
            network = cPickle.load(handle)

        agent = ale_agents.NeuralAgent(network,
                                       parameters.epsilon_start,
                                       parameters.epsilon_min,
                                       parameters.epsilon_decay,
                                       parameters.replay_memory_size,
                                       parameters.experiment_prefix,
                                       parameters.replay_start_size,
                                       parameters.update_frequency,
                                       rng)

    if parameters.method == 'episodic_control':
            if parameters.qec_table is None:
                qec_table = EC_functions.QECTable(parameters.knn,
                                                  parameters.state_dimension,
                                                  parameters.projection_type,
                                                  defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT,
                                                  parameters.buffer_size,
                                                  num_actions,
                                                  rng,
                                                  parameters.rebuild_knn_frequency)
            else:
                handle = open(parameters.qec_table, 'r')
                qec_table = cPickle.load(handle)

            agent = ale_agents.EpisodicControl(qec_table,
                                               parameters.ec_discount,
                                               num_actions,
                                               parameters.epsilon_start,
                                               parameters.epsilon_min,
                                               parameters.epsilon_decay,
                                               parameters.experiment_prefix,
                                               parameters.ec_testing,
                                               rng)

    experiment = ale_experiment.ALEExperiment(ale, agent,
                                              defaults.RESIZED_WIDTH,
                                              defaults.RESIZED_HEIGHT,
                                              parameters.resize_method,
                                              parameters.epochs,
                                              parameters.steps_per_epoch,
                                              parameters.steps_per_test,
                                              parameters.frame_skip,
                                              parameters.death_ends_episode,
                                              parameters.max_start_nullops,
                                              rng)

    experiment.run()
Example #11
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    # Load VAE file
    with open(parameters.vae_aux_file, 'r') as f:
        aux_data = pickle.load(f)
    f.close()
    params = aux_data['params']

    with tf.device(None):  #"/gpu:0"):

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        # sess = tf.Session(config=config)

        # config = tf.ConfigProto(
        #     device_count={'GPU': 0}
        # )

        sess = tf.Session(config=config)

        VAE = vae.vae(params)
        VAE._create_network_()

        try:
            sess.run(tf.global_variables_initializer())
        except AttributeError:
            sess.run(tf.initialize_all_variables())

        saver = tf.train.Saver()
        chkpt = tf.train.get_checkpoint_state(parameters.vae_file)

        if chkpt and chkpt.model_checkpoint_path:
            saver.restore(sess, chkpt.model_checkpoint_path)
        else:
            print 'No checkpoint found'

    import theano
    import ale_experiment
    import ale_agent
    import q_network
    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'
    if parameters.nn_file is None:
        network = q_network.DeepQLearner(
            VAE.z_size, 1, num_actions, parameters.phi_length,
            parameters.discount, parameters.learning_rate,
            parameters.rms_decay, parameters.rms_epsilon, parameters.momentum,
            parameters.clip_delta, parameters.freeze_interval,
            parameters.batch_size, parameters.network_type,
            parameters.update_rule, parameters.batch_accumulator, rng)
    else:
        handle = open(parameters.nn_file, 'r')
        network = cPickle.load(handle)

    agent = ale_agent.NeuralAgent(
        network, parameters.epsilon_start, parameters.epsilon_min,
        parameters.epsilon_decay, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.replay_start_size,
        parameters.update_frequency, rng)

    experiment = ale_experiment.ALEExperiment(
        ale, agent, VAE.X_size[1], VAE.X_size[0], parameters.resize_method,
        parameters.epochs, parameters.steps_per_epoch,
        parameters.steps_per_test, parameters.frame_skip,
        parameters.death_ends_episode, parameters.max_start_nullops, rng, VAE,
        sess)

    experiment.run()
Example #12
0
def launch(defaults):

    timeseries = import_timeseries.timeseries(defaults.trainfile,
                                              defaults.testfile)
    ale = ale_action.ale(timeseries, defaults.steps_per_epoch)

    #re-run last script ctrl+F6
    #test
    #    ale.reset_game()
    #    print(ale.getCurrentState())
    #    print(ale.act(2))
    #    print(ale.priceOrder)
    #    #print("balance: "+ str(ale.stateToCat()[-1]))
    #    print(ale.getCurrentState())
    #    print(ale.act(0))
    #    #print("balance: "+ str(ale.stateToCat()[-1]))
    #    print(ale.getCurrentState())
    #    print(ale.act(0))
    #    #print("balance: "+ str(ale.stateToCat()[-1]))
    #    print(ale.getCurrentState())
    #    print(ale.act(0))
    #
    #    print(ale.getCurrentState())
    #    print(ale.act(0))
    #    #print("balance: "+ str(ale.stateToCat()[-1]))
    #    print(ale.getCurrentState())
    #    print(ale.act(0))
    #    #print("balance: "+ str(ale.stateToCat()[-1]))
    #    print(ale.getCurrentState())
    #    print(ale.act(0))
    #   #print("balance: "+ str(ale.stateToCat()[-1]))
    #    print(ale.timeseries.train[ale.currentPosTime])
    #    print(ale.getCurrentState())
    #    print(ale.act(3))
    #
    #    assert (0==1)
    #
    nn_file = None
    if nn_file is None:
        network = q_network.DeepQLearner(
            defaults.WIDTH,
            defaults.HEIGHT,
            ale.getActionCount(),
            defaults.phi_length,  #num_frames - tipo istorija time
            defaults.discount,
            defaults.learning_rate,
            defaults.rms_decay,
            defaults.rms_epsilon,
            defaults.momentum,
            defaults.CLIP_DELTA,
            defaults.FREEZE_INTERVAL,
            defaults.batch_size,
            defaults.update_rule,
            defaults.batch_accumulator,
            ale.getInputCount())
    else:
        handle = open(nn_file, 'r')
        network = cPickle.load(handle)

    agent = ale_agent.NeuralAgent(network, defaults.epsilon_start,
                                  defaults.epsilon_min, defaults.epsilon_decay,
                                  defaults.replay_memory_size,
                                  defaults.experiment_prefix,
                                  defaults.replay_start_size,
                                  defaults.update_frequency)

    experiment = ale_experiment.ALEExperiment(ale, agent, defaults.WIDTH,
                                              defaults.HEIGHT, defaults.epochs,
                                              defaults.steps_per_epoch,
                                              defaults.steps_per_test,
                                              defaults.death_ends_episode)

    experiment.run()
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    # Load VAE file
    with open(parameters.vae_aux_file, 'r') as f:
        aux_data = pickle.load(f)
    f.close()
    params = aux_data['params']

    with tf.device(None):  #"/gpu:0"):

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        # sess = tf.Session(config=config)
        # )

        sess = tf.Session(config=config)

        VAE = vae.vae(params)
        VAE._create_network_()

        try:
            sess.run(tf.global_variables_initializer())
        except AttributeError:
            sess.run(tf.initialize_all_variables())

        saver = tf.train.Saver()

    import theano
    import ale_experiment
    import ale_agent
    import q_network
    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'
    if parameters.nn_file is None:
        network = q_network.DeepQLearner(
            defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions,
            parameters.phi_length, parameters.discount,
            parameters.learning_rate, parameters.rms_decay,
            parameters.rms_epsilon, parameters.momentum, parameters.clip_delta,
            parameters.freeze_interval, parameters.batch_size,
            parameters.network_type, parameters.update_rule,
            parameters.batch_accumulator, rng)
    else:
        handle = open(parameters.nn_file, 'r')
        network = cPickle.load(handle)

    agent = ale_agent.NeuralAgent(
        network, parameters.epsilon_start, parameters.epsilon_min,
        parameters.epsilon_decay, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.replay_start_size,
        parameters.update_frequency, rng)

    experiment = ale_experiment.ALEExperiment(
        ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_WIDTH,
        parameters.resize_method, parameters.epochs,
        parameters.steps_per_epoch, parameters.steps_per_test,
        parameters.frame_skip, parameters.death_ends_episode,
        parameters.max_start_nullops, rng, VAE, sess, defaults.VAE_REQ_STEPS,
        defaults.VAE_STORAGE_SIZE)
    time_str = time.strftime("%m-%d-%H-%M", time.gmtime())
    vae_save_path = '%s/%s_beta%f_z%d' % (defaults.VAE_OUT_PREFIX,
                                          rom.split('.')[0], params['beta'],
                                          params['z_size'])
    os.system('mkdir -p %s' % (vae_save_path))
    experiment.run()
    ckpt_path = '%s/%s_%s' % (vae_save_path, rom.split('.')[0], time_str)
    print ckpt_path
    if not os.path.exists(ckpt_path):
        os.makedirs(ckpt_path)
    saver.save(sess, '%s/checkpoint.ckpt' % (ckpt_path))
Example #14
0
def launch(args, defaults, description, ALE=None):
    """
    Execute a complete training run.
    """

    parameters = process_args(args, defaults, description)

    if ALE is None:
        if parameters.rom.endswith('.bin'):
            rom = parameters.rom
        else:
            rom = "%s.bin" % parameters.rom
        full_rom_path = os.path.abspath(
            os.path.join(defaults.BASE_ROM_PATH, rom))

        ale = custom_ale_interface.CustomALEInterface(
            rom=parameters.rom, display_screen=parameters.display_screen)
    else:
        ale = ALE  # assume ALE already have rom inside

    num_actions = len(ale.getLegalActionSet())

    # 1. first goes run control from user
    if parameters.nn_file is not None:
        nn_file = os.path.abspath(parameters.nn_file)
        logging.info('loading network from parameters: ' + nn_file)
        with open(nn_file, 'r') as handle:
            network = cPickle.load(handle)
            logging.info('network loaded')
            # nasty bug with discount parameter, sometimes it is not saved
            if not network.__dict__.get('discount', None):
                network.discount = parameters.discount

    # 2. second goes defaults
    elif defaults.__dict__.get(
            'NN_FILE',
            None) is not None:  # do we have NN_FILE in defaults class params?
        nn_file = os.path.abspath(defaults.NN_FILE)
        logging.info('loading network from defaults: ' + nn_file)
        with open(nn_file, 'r') as handle:
            network = cPickle.load(handle)
            logging.info('network loaded')
            # nasty bug with discount parameter, sometimes it is not saved
            if not network.__dict__.get('discount', None):
                network.discount = parameters.discount

    # 3. training from scratch otherwise
    else:
        logging.info('generating network from scratch')
        network = q_network.DeepQLearner(
            defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions,
            parameters.phi_length, parameters.discount,
            parameters.learning_rate, parameters.rms_decay,
            parameters.rms_epsilon, parameters.momentum,
            parameters.freeze_interval, parameters.batch_size,
            parameters.network_type, parameters.update_rule,
            parameters.batch_accumulator)

    agent = ale_agent.NeuralAgent(
        network, parameters.epsilon_start, parameters.epsilon_min,
        parameters.epsilon_decay, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.replay_start_size,
        parameters.update_frequency,
        'experiments')  # experiment folder to store results

    experiment = ale_experiment.ALEExperiment(
        ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT,
        parameters.resize_method, parameters.epochs,
        parameters.steps_per_epoch, parameters.steps_per_test,
        parameters.death_ends_episode)

    experiment.run()
Example #15
0
def start_training(params):
    """
    Initialize rom, game, agent, network and start a training run
    """

    # CREATE A FOLDER TO HOLD RESULTS

    exp_pref = "../results/" + params.EXPERIMENT_PREFIX
    time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime())
    exp_dir = exp_pref + time_str + \
                   "{}".format(params.LEARNING_RATE).replace(".", "p") + "_" \
                   + "{}".format(params.DISCOUNT).replace(".", "p")

    try:
        os.stat(exp_dir)
    except OSError:
        os.makedirs(exp_dir)

    logger = logging.getLogger("DeepLogger")
    logger.setLevel(logging.INFO)

    # Logging filehandler
    #fh = logging.FileHandler(exp_dir + "/log.log")
    # Rotate file when filesize is 5 mb
    fh = RotatingFileHandler(exp_dir + "/log.log",
                             maxBytes=5000000,
                             backupCount=100)

    fh.setLevel(logging.INFO)

    # Console filehandler
    ch = logging.StreamHandler()
    ch.setLevel(logging.INFO)

    formatter = logging.Formatter('%(message)s')
    fh.setFormatter(formatter)
    ch.setFormatter(formatter)

    logger.addHandler(fh)

    # Prevent nohup from producing large log file, logging to file is handled internally
    # logger.addHandler(ch)

    log_params(logger, params)

    #logging.basicConfig(level=logging.INFO, filename=exp_dir + "/log.log")

    if params.DETERMINISTIC:
        rng = np.random.RandomState(12345)
    else:
        rng = np.random.RandomState()

    if params.CUDNN_DETERMINISTIC:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    # Init ale
    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', 123)
    ale.setBool('display_screen', params.DISPLAY_SCREEN)
    ale.setFloat('repeat_action_probability', params.REPEAT_ACTION_PROBABILITY)
    full_rom_path = os.path.join(params.ROM_PATH, params.ROM_NAME)
    ale.loadROM(full_rom_path)
    num_actions = len(ale.getMinimalActionSet())

    print "Legal actions: ", num_actions
    print ale.getMinimalActionSet()

    # Instantiate network
    logger.info("Setting up network...")
    network = None  # Be able to continue training from a network or watch a network play
    if (params.NETWORK_PICKLE_FILE is None):
        logger.info("Initializing a new random network...")
        network = q_network.DeepQLearner(
            params.RESIZED_WIDTH, params.RESIZED_HEIGHT, num_actions,
            params.PHI_LENGTH, params.DISCOUNT, params.LEARNING_RATE,
            params.RMS_DECAY, params.RMS_EPSILON, params.MOMENTUM,
            params.CLIP_DELTA, params.FREEZE_INTERVAL, params.BATCH_SIZE,
            params.NETWORK_TYPE, params.UPDATE_RULE, params.BATCH_ACCUMULATOR,
            rng)
    else:
        logger.info("Loading network instance from file...")
        handle = open(params.NETWORK_PICKLE_FILE, 'r')
        network = cPickle.load(handle)

    # Only used when getting a random network
    if params.RANDOM_NETWORK_PICKLE:
        import sys
        sys.setrecursionlimit(10000)
        result_net_file = open(params.EXPERIMENT_PREFIX + '.pkl', 'w')
        print "File opened"
        cPickle.dump(network, result_net_file, -1)
        print "Pickle dumped"
        result_net_file.close()
        sys.exit(0)

    # Instatiate agent
    logger.info("Setting up agent...")
    agent = ale_agent.NeuralAgent(network, params.EPSILON_START,
                                  params.EPSILON_MIN, params.EPSILON_DECAY,
                                  params.REPLAY_MEMORY_SIZE, exp_dir,
                                  params.REPLAY_START_SIZE,
                                  params.UPDATE_FREQUENCY, rng)

    # Instantiate experient
    logger.info("Setting up experiment...")
    experiment = ale_experiment.ALEExperiment(
        ale, agent, params.RESIZED_WIDTH, params.RESIZED_HEIGHT,
        params.RESIZE_METHOD, params.EPOCHS, params.STEPS_PER_EPOCH,
        params.STEPS_PER_TEST, params.FRAME_SKIP, params.DEATH_ENDS_EPISODE,
        params.MAX_START_NULLOPS, rng)

    # Run experiment
    logger.info("Running experiment...")
    experiment.run()
Example #16
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    time_str = time.strftime("_%m-%d-%H-%M_", time.localtime())
    logging.basicConfig(filename='log' + time_str + '.txt', level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)
    ale.loadROM(full_rom_path)
    num_actions = len(ale.getMinimalActionSet())

    ######################################################
    # Daniel: This is where I insert human-guided stuff. #
    ######################################################

    # Logic to deal with loading a separate network trained on human data.
    # Must also address mapping from human net (0,1,2,...) to ALE.
    # I know that, for Breakout, my {0,1,2} correspond to {NOOP,LEFT,RIGHT}.
    # But how should these get mapped to ALE actions? I know 0=noop, 1=fire.
    # Keep in mind that there's a SECOND mapping that happens after this!
    map_action_index = None
    human_net = None
    human_experience_replay = None

    if parameters.use_human_net:
        if (rom == 'breakout' or rom == 'breakout.bin'):
            # This maps the action indices from the net (0,1,2,...) into a
            # **second** mapping [0 1 3 4], which is game-independent, so the
            # main work is to set map_action_index.
            # Thus, 0 ==> 0 ==> 0 (NOOP)
            # Thus, 1 ==> 3 ==> 4 (LEFT)
            # Thus, 2 ==> 2 ==> 3 (RIGHT)
            # (The net doesn't use FIRE.)
            map_action_index = {0: 0, 1: 3, 2: 2}
        elif (rom == 'space_invaders' or rom == 'space_invaders.bin'):
            # Second mapping is [0 1 3 4 11 12] E.g., 4 is FLEFT in my data,
            # needs to be mapped to index 5 so it results in '12'.
            map_action_index = {0: 0, 1: 1, 2: 3, 3: 2, 4: 5, 5: 4}
        else:
            raise ValueError("rom={} doesn't have action mapping".format(rom))

        # Let's make the human net; #actions = len(map_action_index).
        human_net = human_q_net.HumanQNetwork(
            defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT,
            len(map_action_index), parameters.phi_length,
            parameters.batch_size, parameters.network_type,
            parameters.human_net_path, map_action_index)

    if parameters.use_human_experience_replay:
        if (rom == 'breakout' or rom == 'breakout.bin'):
            human_experience_replay = np.load(
                parameters.human_experience_replay_path)
        else:
            raise ValueError("rom={} doesn't have xp replay".format(rom))

    ###########################
    # Daniel: Back to normal. #
    ###########################

    if parameters.nn_file is None:
        network = q_network.DeepQLearner(
            defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions,
            parameters.phi_length, parameters.discount,
            parameters.learning_rate, parameters.rms_decay,
            parameters.rms_epsilon, parameters.momentum, parameters.clip_delta,
            parameters.freeze_interval, parameters.batch_size,
            parameters.network_type, parameters.update_rule,
            parameters.batch_accumulator, rng)
    else:
        handle = open(parameters.nn_file, 'rb')
        network = cPickle.load(handle)

    agent = ale_agent.NeuralAgent(
        network, parameters.epsilon_start, parameters.epsilon_min,
        parameters.epsilon_decay, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.replay_start_size,
        parameters.update_frequency, rng, parameters.epochs,
        parameters.use_human_net, parameters.use_human_experience_replay,
        human_net, human_experience_replay)

    experiment = ale_experiment.ALEExperiment(
        ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT,
        parameters.resize_method, parameters.epochs,
        parameters.steps_per_epoch, parameters.steps_per_test,
        parameters.frame_skip, parameters.death_ends_episode,
        parameters.max_start_nullops, rng)

    experiment.run()
Example #17
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

#    if parameters.rom.endswith('.bin'):
#        rom = parameters.rom
#    else:
#        rom = "%s.bin" % parameters.rom
    rom = parameters.rom
    core = parameters.core
    if core == 'snes':
        core = 'snes9x2010_libretro.so'
    elif core == 'atari':
        core = 'stella_libretro.so'
    else:
        raise ValueError("--core must be atari|snes")

    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)
    full_core_path = os.path.join(defaults.BASE_CORE_PATH, core)
    two_players = False
    if parameters.nn_file2 is not None:
        two_players = True

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    ale = rle_python_interface.RLEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False) # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)
    if two_players:
        ale.setBool('two_players', True)

    ale.loadROM(full_rom_path, full_core_path)

    num_actions = len(ale.getMinimalActionSet())

    if parameters.nn_file is None:
        network = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                         defaults.RESIZED_HEIGHT,
                                         num_actions,
                                         parameters.phi_length,
                                         parameters.discount,
                                         parameters.learning_rate,
                                         parameters.rms_decay,
                                         parameters.rms_epsilon,
                                         parameters.momentum,
                                         parameters.clip_delta,
                                         parameters.freeze_interval,
                                         parameters.batch_size,
                                         parameters.network_type,
                                         parameters.update_rule,
                                         parameters.batch_accumulator,
                                         rng)
    else:
        handle = open(parameters.nn_file, 'r')
        network = cPickle.load(handle)

    agent2 = None

    if two_players:
        if parameters.nn_file2 == 'default':
            network2 = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                             defaults.RESIZED_HEIGHT,
                                             num_actions,
                                             parameters.phi_length,
                                             parameters.discount,
                                             parameters.learning_rate,
                                             parameters.rms_decay,
                                             parameters.rms_epsilon,
                                             parameters.momentum,
                                             parameters.clip_delta,
                                             parameters.freeze_interval,
                                             parameters.batch_size,
                                             parameters.network_type,
                                             parameters.update_rule,
                                             parameters.batch_accumulator,
                                             rng)
        else:
            handle2 = open(parameters.nn_file2, 'r')
            network2 = cPickle.load(handle2)

        agent2 = ale_agent.NeuralAgent(network2,
                                      parameters.epsilon_start,
                                      parameters.epsilon_min,
                                      parameters.epsilon_decay,
                                      parameters.replay_memory_size,
                                      parameters.experiment_prefix,
                                      parameters.replay_start_size,
                                      parameters.update_frequency,
                                      rng,
                                      'b')

    agent = ale_agent.NeuralAgent(network,
                                  parameters.epsilon_start,
                                  parameters.epsilon_min,
                                  parameters.epsilon_decay,
                                  parameters.replay_memory_size,
                                  parameters.experiment_prefix,
                                  parameters.replay_start_size,
                                  parameters.update_frequency,
                                  rng)

    experiment = ale_experiment.ALEExperiment(ale, agent,
                                              defaults.RESIZED_WIDTH,
                                              defaults.RESIZED_HEIGHT,
                                              parameters.resize_method,
                                              parameters.epochs,
                                              parameters.steps_per_epoch,
                                              parameters.steps_per_test,
                                              parameters.frame_skip,
                                              parameters.death_ends_episode,
                                              parameters.max_start_nullops,
                                              rng,
                                              agent2)


    experiment.run()
Example #18
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    try:
        mode = int(parameters.mode)
    except ValueError:
        mode = 1

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    if parameters.experiment_directory:
        experiment_directory = parameters.experiment_directory
    else:
        time_str = time.strftime("_%Y-%m-%d-%H-%M")
        experiment_directory = parameters.experiment_prefix + time_str \
                                   + '_mode_' + str(mode)

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    if parameters.record_video:
        video_directory = os.path.join(experiment_directory, 'video')
        if not os.path.isdir(video_directory):
            os.makedirs(video_directory)

        ale.setString('record_screen_dir', video_directory)

        if sys.platform != 'darwin':
            ale.setBool('sound', True)
            ale.setString("record_sound_filename",
                          os.path.join(video_directory, "sound.wav"))
            # "We set fragsize to 64 to ensure proper sound sync"
            # (that's what videoRecordingExample.cpp in ALE says. I don't really know what it means)
            ale.setInt("fragsize", 64)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    ale.setMode(mode)

    if parameters.nn_file is None:
        network = q_network.DeepQLearner(
            defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions,
            parameters.phi_length, parameters.discount,
            parameters.learning_rate, parameters.rms_decay,
            parameters.rms_epsilon, parameters.momentum, parameters.clip_delta,
            parameters.freeze_interval, parameters.use_double,
            parameters.batch_size, parameters.network_type,
            parameters.update_rule, parameters.batch_accumulator, rng)
    else:
        handle = open(parameters.nn_file, 'r')
        network = cPickle.load(handle)

    agent = ale_agent.NeuralAgent(network,
                                  parameters.epsilon_start,
                                  parameters.epsilon_min,
                                  parameters.epsilon_decay,
                                  parameters.replay_memory_size,
                                  experiment_directory,
                                  parameters.replay_start_size,
                                  parameters.update_frequency,
                                  rng,
                                  recording=parameters.recording)

    experiment = ale_experiment.ALEExperiment(
        ale,
        agent,
        defaults.RESIZED_WIDTH,
        defaults.RESIZED_HEIGHT,
        parameters.resize_method,
        parameters.epochs,
        parameters.steps_per_epoch,
        parameters.steps_per_test,
        parameters.frame_skip,
        parameters.death_ends_episode,
        parameters.max_start_nullops,
        rng,
        length_in_episodes=parameters.episodes)

    experiment.run()