Exemple #1
0
    def __init__(self,
                 rand_seed,
                 rom,
                 display=False,
                 frame_skip=4,
                 no_op_max=7):
        self.ale = ale_python_interface.ALEInterface()

        self.ale.setInt(b'random_seed', rand_seed)
        self.ale.setFloat(b'repeat_action_probability', 0.0)
        self.ale.setBool(b'color_averaging', True)
        self.ale.setInt(b'frame_skip', frame_skip)

        self._no_op_max = no_op_max
        if display:
            self._setup_display()

        self.ale.loadROM(rom.encode('ascii'))

        # collect minimal action set
        self.real_actions = self.ale.getMinimalActionSet()

        # height=210, width=160
        self._screen = np.empty((210, 160, 1), dtype=np.uint8)

        self.reset()
Exemple #2
0
    def __init__(self, opt):
        self.envType = opt.environment
        self.show = opt.show
        self.delay = opt.delay
        self.width = opt.width
        self.height = opt.height
        self.noopMax = opt.noopMax

        if self.show is True:
            cv2.namedWindow("show")
        if self.envType == "ALE":
            import ale_python_interface
            self.env = ale_python_interface.ALEInterface()
            if opt.randomSeed == None:
                self.env.setInt('random_seed', 0)
            else:
                self.env.setInt('random_seed', opt.randomSeed)
            self.env.setInt("frame_skip", opt.frameSkip)
            self.env.setBool('color_averaging', opt.colorAverageing)
            self.env.setInt('phosphor_blend_ratio', opt.phosphorBlendRatio)
            self.env.setInt('max_num_frames_per_episode',
                            opt.maxNumFramesPerEpisode)
            # print self.env.getInt('phosphor_blend_ratio')
            self.env.setBool('sound', False)
            self.env.setBool('display_screen', False)
            self.env.setFloat("repeat_action_probability", 0.0)
            self.env.loadROM(opt.pathRom)
            self.legal_actions = self.env.getMinimalActionSet()
            self.n_actions = len(self.legal_actions)
Exemple #3
0
def main():
    ale = ale_python_interface.ALEInterface()
    ale.loadROM('aleroms/breakout.bin')
    actions = ale.getMinimalActionSet()

    params = shared.bindNew()
    print 'building main model...'
    model = make_model(len(actions))
    train, get_pol, get_v, get_grads, update_from_grads, lr, beta = make_funcs(
        model, params)

    global_params = [mparray(p.get_value()) for p in params]
    for gp, p in zip(global_params, params):
        p.set_value(gp[3], borrow=True)

    nthreads = 4

    thread_grads = [[mparray_zero(p.get_value().shape) for p in params]
                    for i in range(nthreads)]
    thread_counters = mparray_zero((nthreads, ))
    counters = mpasnp(thread_counters)
    running = mparray_zero((1, ))
    running[3][0] += nthreads
    print 'starting threads'
    manager = mp.Manager()
    rewards = manager.list()
    for i in range(nthreads):
        p = mp.Process(target=thread_learner,
                       args=(i, global_params, thread_grads[i],
                             thread_counters, running, rewards))
        p.start()

    mp.Process(target=plot_stuff, args=(rewards, running)).start()

    try:
        last_draw = time.time()
        upds = numpy.zeros(nthreads)
        cnts = numpy.zeros(nthreads)
        while running[3][0] > 0:
            for i in range(nthreads):
                if counters[i] > 0:
                    with thread_grads[i][0][0].get_lock():
                        grads = [g[3] / counters[i] for g in thread_grads[i]]
                        update_from_grads(*grads)
                        for g in thread_grads[i]:
                            g[3][:] *= 0
                        upds[i] += counters[i]
                        cnts[i] += 1
                        counters[i] = 0
            if time.time() - last_draw > 2:
                print 'updates:', numpy.mean(
                    upds / cnts), numpy.mean(upds), running[3]
                upds = numpy.zeros(nthreads)
                cnts = numpy.zeros(nthreads)
                last_draw = time.time()

    finally:
        running[3][0] = 0
Exemple #4
0
def initialize(pid, device, flags, comm, share_comm):
    message = 'initialize process: {:d} with GPU: {} game: {}'.format(
        pid, device, flags.rom)
    comm.send([-1, 'print', message], dest=flags.threads)
    import os
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = device[-1]
    np.random.seed(flags.seed)
    tf.set_random_seed(flags.seed)
    try:
        import ale_python_interface
    except ImportError:
        import atari_py.ale_python_interface as ale_python_interface

    # initialize ALE environment
    if flags.rom.endswith('.bin'):
        rom = flags.rom
    else:
        rom = "%s.bin" % flags.rom
    full_rom_path = os.path.join(flags.roms_path, rom)
    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', flags.seed)
    ale.setBool('sound', False)
    ale.setBool('display_screen', False)
    ale.setFloat('repeat_action_probability', flags.repeat_action_probability)
    ale.loadROM(full_rom_path)
    num_actions = len(ale.getMinimalActionSet())

    # adjust flags
    flags.num_actions = num_actions
    flags.logs_path = os.path.join(flags.logs_path,
                                   '#' + str(pid) + '_' + flags.rom)
    tf.gfile.MakeDirs(flags.logs_path)

    # print settings
    setting_file = open(os.path.join(flags.logs_path, 'flags.txt'), mode='w+')
    for key, item in flags.__flags.items():
        setting_file.write(key + ' : ' + str(item) + '\n')

    # initialize agent
    if flags.ot:
        network = neural_networks.OptimalityTighteningNetwork(
            pid, flags, device, share_comm)
    else:
        network = neural_networks.DeepQNetwork(pid, flags, device, share_comm)

    setting_file.write(network.nn_structure_file)
    setting_file.close()

    if flags.ot:
        agent = agents.OptimalityTigheningAgent(pid, network, flags, comm,
                                                share_comm)
    else:
        agent = agents.QLearning(pid, network, flags, comm, share_comm)
    interaction.Interaction(pid, ale, agent, flags, comm).start()
Exemple #5
0
def record_new(rom, output, frames, episodes, seed, snapshot_interval):
    rom_name = os.path.splitext(os.path.split(rom)[-1])[0]
    pygame.init()
    ale = ALE.ALEInterface()
    ale.setInt('random_seed', seed)
    ale.setFloat('repeat_action_probability', 0)
    ale.setBool('color_averaging', False)
    ale.setBool('display_screen', True)
    ale.loadROM(rom)
    demo = Demonstration(rom=rom_name, action_set=ale.getMinimalActionSet())
    record(ale, demo, output, frames, episodes, snapshot_interval)
Exemple #6
0
def resume(partial_demo, rom, frames, episodes, snapshot_interval):
    pygame.init()
    demo = Demonstration.load(partial_demo)
    ale = ALE.ALEInterface()
    ale.setFloat('repeat_action_probability', 0)
    ale.setBool('color_averaging', False)
    ale.setBool('display_screen', True)
    ale.loadROM(rom)
    # restore snapshot from original recording + begin new episode
    # n.b. needed to preserve state from the original recording, like the seed
    demo.reset_to_latest_snapshot(ale)
    ale.reset_game()
    record(ale, demo, partial_demo, frames, episodes, snapshot_interval)
    def _setup_ale(self, rom, display_game_screen):
        '''
        Args:
            rom (str): path to Atari rom.
            display_game_screen (bool): true iff the Atari screen should be displayed.

        Summary:
            Initializes the Atari Learning Environment.
        '''
        # Create ALE Interface and set the seed.
        self.ale = ale_interface.ALEInterface()
        self.ale.setInt('random_seed', 123)

        # Setup visuals and sound depending on os.
        if display_game_screen:
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                self.ale.setBool('sound', False)  # Sound doesn't work on OSX
            elif sys.platform.startswith('linux'):
                self.ale.setBool('sound', True)
            self.ale.setBool('display_screen', True)

        # Load the ROM.
        self.rom = rom.replace(".bin", "")
        full_rom_path = os.path.dirname(
            os.path.realpath(__file__)) + "/roms/" + self.rom + ".bin"
        self.ale.loadROM(full_rom_path)

        # Grab game details.
        self.ram_size = self.ale.getRAMSize()
        self.screen_width, self.screen_height = self.ale.getScreenDims()
        self.actions = self.ale.getLegalActionSet()

        # Get Ram, initial image info.
        ram = numpy.zeros((self.ram_size), dtype=numpy.uint8)
        ram_state = self.ale.getRAM(ram)
        screen_data = self.ale.getScreenGrayscale(
        ) if self.grayscale else self.ale.getScreenRGB()

        # Make initial state.
        self.init_state = AtariState(screen_data,
                                     ram_state,
                                     objects_from_image=self.image_to_object)
    def __init__(self, rom, outSize):
        self._ale = ALE.ALEInterface()
        self._ale.setInt("random_seed".encode(), int(time.time()))
        self._ale.setFloat("repeat_action_probability".encode(), 0)
        self._ale.setBool("color_averaging".encode(), True)
        self._ale.loadROM(rom.encode())

        d = self._ale.getScreenDims()

        ## The size of the screen in the form [height, width]
        self.screenSize = [d[1], d[0]]
        ## The size of the images returned by getScreen  the form
        #  [height, width]
        self.outSize = outSize
        self._RAWScreen = np.empty([d[0] * d[1]], dtype=np.uint8)
        self._RAWScaled = np.empty(self.outSize, dtype=np.uint8)
        self._RGBScreen = np.empty([d[1], d[0], 3], dtype=np.uint8)

        t = time.localtime()
        self._creaTime  = str(t.tm_year) + "-" + str(t.tm_mon)  + \
                                           "-" + str(t.tm_mday) + \
                                           "-" + str(t.tm_hour) + \
                                           "." + str(t.tm_min)  + \
                                           "." + str(t.tm_sec)
Exemple #9
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    agent = None

    if parameters.use_episodic_control:
        if parameters.qec_table is None:
            qec_table = EC_functions.QECTable(
                parameters.knn, parameters.state_dimension,
                parameters.projection_type,
                defaults.RESIZED_WIDTH * defaults.RESIZED_HEIGHT,
                parameters.buffer_size, num_actions, rng)
        else:
            handle = open(parameters.qec_table, 'r')
            qec_table = pickle.load(handle)

        agent = EC_agent.EpisodicControl(qec_table, parameters.ec_discount,
                                         num_actions, parameters.epsilon_start,
                                         parameters.epsilon_min,
                                         parameters.epsilon_decay,
                                         parameters.experiment_prefix, rng)

    experiment = ale_experiment.ALEExperiment(
        ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT,
        parameters.resize_method, parameters.epochs,
        parameters.steps_per_epoch, parameters.steps_per_test,
        parameters.frame_skip, parameters.death_ends_episode,
        parameters.max_start_nullops, rng)

    experiment.run()
Exemple #10
0
def start_training(params):
    """
    Initialize rom, game, agent, network and start a training run
    """

    # CREATE A FOLDER TO HOLD RESULTS

    exp_pref = "../results/" + params.EXPERIMENT_PREFIX
    time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime())
    exp_dir = exp_pref + time_str + \
                   "{}".format(params.LEARNING_RATE).replace(".", "p") + "_" \
                   + "{}".format(params.DISCOUNT).replace(".", "p")

    try:
        os.stat(exp_dir)
    except OSError:
        os.makedirs(exp_dir)

    logger = logging.getLogger("DeepLogger")
    logger.setLevel(logging.INFO)

    # Logging filehandler
    #fh = logging.FileHandler(exp_dir + "/log.log")
    # Rotate file when filesize is 5 mb
    fh = RotatingFileHandler(exp_dir + "/log.log",
                             maxBytes=5000000,
                             backupCount=100)

    fh.setLevel(logging.INFO)

    # Console filehandler
    ch = logging.StreamHandler()
    ch.setLevel(logging.INFO)

    formatter = logging.Formatter('%(message)s')
    fh.setFormatter(formatter)
    ch.setFormatter(formatter)

    logger.addHandler(fh)

    # Prevent nohup from producing large log file, logging to file is handled internally
    # logger.addHandler(ch)

    log_params(logger, params)

    #logging.basicConfig(level=logging.INFO, filename=exp_dir + "/log.log")

    if params.DETERMINISTIC:
        rng = np.random.RandomState(12345)
    else:
        rng = np.random.RandomState()

    if params.CUDNN_DETERMINISTIC:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    # Init ale
    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', 123)
    ale.setBool('display_screen', params.DISPLAY_SCREEN)
    ale.setFloat('repeat_action_probability', params.REPEAT_ACTION_PROBABILITY)
    full_rom_path = os.path.join(params.ROM_PATH, params.ROM_NAME)
    ale.loadROM(full_rom_path)
    num_actions = len(ale.getMinimalActionSet())

    print "Legal actions: ", num_actions
    print ale.getMinimalActionSet()

    # Instantiate network
    logger.info("Setting up network...")
    network = None  # Be able to continue training from a network or watch a network play
    if (params.NETWORK_PICKLE_FILE is None):
        logger.info("Initializing a new random network...")
        network = q_network.DeepQLearner(
            params.RESIZED_WIDTH, params.RESIZED_HEIGHT, num_actions,
            params.PHI_LENGTH, params.DISCOUNT, params.LEARNING_RATE,
            params.RMS_DECAY, params.RMS_EPSILON, params.MOMENTUM,
            params.CLIP_DELTA, params.FREEZE_INTERVAL, params.BATCH_SIZE,
            params.NETWORK_TYPE, params.UPDATE_RULE, params.BATCH_ACCUMULATOR,
            rng)
    else:
        logger.info("Loading network instance from file...")
        handle = open(params.NETWORK_PICKLE_FILE, 'r')
        network = cPickle.load(handle)

    # Only used when getting a random network
    if params.RANDOM_NETWORK_PICKLE:
        import sys
        sys.setrecursionlimit(10000)
        result_net_file = open(params.EXPERIMENT_PREFIX + '.pkl', 'w')
        print "File opened"
        cPickle.dump(network, result_net_file, -1)
        print "Pickle dumped"
        result_net_file.close()
        sys.exit(0)

    # Instatiate agent
    logger.info("Setting up agent...")
    agent = ale_agent.NeuralAgent(network, params.EPSILON_START,
                                  params.EPSILON_MIN, params.EPSILON_DECAY,
                                  params.REPLAY_MEMORY_SIZE, exp_dir,
                                  params.REPLAY_START_SIZE,
                                  params.UPDATE_FREQUENCY, rng)

    # Instantiate experient
    logger.info("Setting up experiment...")
    experiment = ale_experiment.ALEExperiment(
        ale, agent, params.RESIZED_WIDTH, params.RESIZED_HEIGHT,
        params.RESIZE_METHOD, params.EPOCHS, params.STEPS_PER_EPOCH,
        params.STEPS_PER_TEST, params.FRAME_SKIP, params.DEATH_ENDS_EPISODE,
        params.MAX_START_NULLOPS, rng)

    # Run experiment
    logger.info("Running experiment...")
    experiment.run()
Exemple #11
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    # dump parameters for replication
    time_str = time.strftime("%Y-%m-%d_%H-%M_", time.localtime())
    exp_dir = time_str + parameters.experiment_prefix
    exp_dir = os.path.join("results", exp_dir)
    if not os.path.isdir(exp_dir):
        os.makedirs(exp_dir)
    parameter_file = open(os.path.join(exp_dir, 'parameter.txt'), 'w', 0)
    parameter_file.write(str(parameters))
    parameter_file.flush()
    parameter_file.close()

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    avail_actions = ale.getMinimalActionSet()
    if parameters.train_all:
        num_actions = len(ale.getLegalActionSet())
    else:
        num_actions = len(avail_actions)

    print "avail_actions: " + str(avail_actions)
    print "num_actions: " + str(num_actions)

    if parameters.nn_file is None:
        network = q_network.DeepQLearner(
            defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, avail_actions,
            num_actions, parameters.phi_length, parameters.discount,
            parameters.learning_rate, parameters.rms_decay,
            parameters.rms_epsilon, parameters.momentum, parameters.clip_delta,
            parameters.freeze_interval, parameters.batch_size,
            parameters.network_type, parameters.update_rule,
            parameters.batch_accumulator, rng, parameters.train_all)
    else:
        handle = open(parameters.nn_file, 'r')
        network = cPickle.load(handle)

    agent = ale_agent.NeuralAgent(
        network, parameters.epsilon_start, parameters.epsilon_min,
        parameters.epsilon_decay, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.replay_start_size,
        parameters.update_frequency, rng, exp_dir, parameters.train_all)

    experiment = ale_experiment.ALEExperiment(
        ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT,
        parameters.resize_method, parameters.epochs,
        parameters.steps_per_epoch, parameters.steps_per_test,
        parameters.frame_skip, parameters.death_ends_episode,
        parameters.max_start_nullops, rng)

    experiment.run()
Exemple #12
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    # FOR VISUALIZATION
    USE_SDL = False
    if parameters.display_screen:
        if USE_SDL:
            import sys
            if sys.platform == 'darwin':
                import pygame
                pygame.init()
                ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    agent = None

    if parameters.use_episodic_control:
        if parameters.qec_table is None:
            qec_table = EC_functions.QECTable(
                parameters.knn, parameters.state_dimension,
                parameters.projection_type,
                defaults.RESIZED_WIDTH * defaults.RESIZED_HEIGHT,
                parameters.buffer_size, num_actions, rng)
        else:
            handle = open(parameters.qec_table, 'r')
            qec_table = cPickle.load(handle)

            #If this doesnt work load using the function below
            # def try_to_load_as_pickled_object_or_None(filepath):
            #     """
            #     This is a defensive way to write pickle.load, allowing for very large files on all platforms
            #     """
            #     max_bytes = 2 ** 31 - 1
            #     try:
            #         input_size = os.path.getsize(filepath)
            #         bytes_in = bytearray(0)
            #         with open(filepath, 'rb') as f_in:
            #             for _ in range(0, input_size, max_bytes):
            #                 bytes_in += f_in.read(max_bytes)
            #         obj = cPickle.loads(bytes_in)
            #     except:
            #         return None
            #     return obj
            # qec_table = try_to_load_as_pickled_object_or_None(handle)

        agent = IBL_agent.EpisodicControl(qec_table, parameters.ec_discount,
                                          num_actions,
                                          parameters.epsilon_start,
                                          parameters.epsilon_min,
                                          parameters.epsilon_decay,
                                          parameters.experiment_prefix, rng)

    experiment = ale_experiment.ALEExperiment(
        ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT,
        parameters.resize_method, parameters.epochs,
        parameters.steps_per_epoch, parameters.steps_per_test,
        parameters.frame_skip, parameters.death_ends_episode,
        parameters.max_start_nullops, rng)

    experiment.run()
Exemple #13
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    if parameters.nn_file is None:
        print 'building network...'
        network = q_network.DeepQLearner(
            defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions,
            parameters.phi_length, parameters.discount,
            parameters.learning_rate, parameters.rms_decay,
            parameters.rms_epsilon, parameters.momentum, parameters.clip_delta,
            parameters.freeze_interval, parameters.batch_size,
            parameters.network_type, parameters.update_rule,
            parameters.batch_accumulator, rng)
    else:
        print 'loading network...'
        handle = open(parameters.nn_file, 'r')
        network = cPickle.load(handle)

    print 'building agent...'
    if parameters.aws_secret_key and parameters.aws_access_key and parameters.s3_bucket:
        s3_utility = S3Utility(parameters.aws_access_key,
                               parameters.aws_secret_key, parameters.s3_bucket)
    else:
        s3_utility = None

    agent = ale_agent.NeuralAgent(
        network, parameters.epsilon_start, parameters.epsilon_min,
        parameters.epsilon_decay, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.replay_start_size,
        parameters.update_frequency, rng, s3_utility)

    print 'building experiment...'
    experiment = ale_experiment.ALEExperiment(
        ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT,
        parameters.resize_method, parameters.epochs,
        parameters.steps_per_epoch, parameters.steps_per_test,
        parameters.frame_skip, parameters.death_ends_episode,
        parameters.max_start_nullops, rng)

    print 'running experiment...'
    experiment.run()
Exemple #14
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ## Here !!!
    if isinstance(parameters.record_screen_dir, str):
        if len(parameters.record_screen_dir):
            ale.setString('record_screen_dir', parameters.record_screen_dir)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    if parameters.nn_file is None:
        network = q_network.DeepQLearner(
            defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions,
            parameters.phi_length, parameters.discount,
            parameters.learning_rate, parameters.rms_decay,
            parameters.rms_epsilon, parameters.momentum, parameters.clip_delta,
            parameters.freeze_interval, parameters.batch_size,
            parameters.network_type, parameters.update_rule,
            parameters.batch_accumulator, rng)
    else:
        with open(parameters.nn_file, 'r') as handle:
            network = cPickle.load(handle)

    agent = ale_agent.NeuralAgent(
        network, parameters.epsilon_start, parameters.epsilon_min,
        parameters.epsilon_decay, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.replay_start_size,
        parameters.update_frequency, rng)

    ## Get the coach: let it have read/write access to the agent's databanks
    coach = None
    if parameters.nn_coach_file is not None:
        with open(parameters.nn_coach_file, 'r') as handle:
            network = cPickle.load(handle)
        coach = ale_coach.NeuralCoach(network, agent.get_training_dataset(),
                                      parameters.coach_epsilon, rng)

    experiment = ale_experiment.ALEExperiment(ale,
                                              agent,
                                              defaults.RESIZED_WIDTH,
                                              defaults.RESIZED_HEIGHT,
                                              parameters.resize_method,
                                              parameters.epochs,
                                              parameters.steps_per_epoch,
                                              parameters.steps_per_test,
                                              parameters.frame_skip,
                                              parameters.death_ends_episode,
                                              parameters.max_start_nullops,
                                              rng,
                                              coach=coach)

    experiment.run()
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    # Load VAE file
    with open(parameters.vae_aux_file, 'r') as f:
        aux_data = pickle.load(f)
    f.close()
    params = aux_data['params']

    with tf.device(None):  #"/gpu:0"):

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        # sess = tf.Session(config=config)
        # )

        sess = tf.Session(config=config)

        VAE = vae.vae(params)
        VAE._create_network_()

        try:
            sess.run(tf.global_variables_initializer())
        except AttributeError:
            sess.run(tf.initialize_all_variables())

        saver = tf.train.Saver()

    import theano
    import ale_experiment
    import ale_agent
    import q_network
    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'
    if parameters.nn_file is None:
        network = q_network.DeepQLearner(
            defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions,
            parameters.phi_length, parameters.discount,
            parameters.learning_rate, parameters.rms_decay,
            parameters.rms_epsilon, parameters.momentum, parameters.clip_delta,
            parameters.freeze_interval, parameters.batch_size,
            parameters.network_type, parameters.update_rule,
            parameters.batch_accumulator, rng)
    else:
        handle = open(parameters.nn_file, 'r')
        network = cPickle.load(handle)

    agent = ale_agent.NeuralAgent(
        network, parameters.epsilon_start, parameters.epsilon_min,
        parameters.epsilon_decay, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.replay_start_size,
        parameters.update_frequency, rng)

    experiment = ale_experiment.ALEExperiment(
        ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_WIDTH,
        parameters.resize_method, parameters.epochs,
        parameters.steps_per_epoch, parameters.steps_per_test,
        parameters.frame_skip, parameters.death_ends_episode,
        parameters.max_start_nullops, rng, VAE, sess, defaults.VAE_REQ_STEPS,
        defaults.VAE_STORAGE_SIZE)
    time_str = time.strftime("%m-%d-%H-%M", time.gmtime())
    vae_save_path = '%s/%s_beta%f_z%d' % (defaults.VAE_OUT_PREFIX,
                                          rom.split('.')[0], params['beta'],
                                          params['z_size'])
    os.system('mkdir -p %s' % (vae_save_path))
    experiment.run()
    ckpt_path = '%s/%s_%s' % (vae_save_path, rom.split('.')[0], time_str)
    print ckpt_path
    if not os.path.exists(ckpt_path):
        os.makedirs(ckpt_path)
    saver.save(sess, '%s/checkpoint.ckpt' % (ckpt_path))
Exemple #16
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(parameters.Seed)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd_data = 'deterministic'
        theano.config.dnn.conv.algo_bwd_filter = 'deterministic'

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    agent = None

    if not parameters.close2:
        print 'transition length is ', parameters.transition_length, 'transition range is', parameters.transition_range
    if parameters.method == 'ot':
        if parameters.nn_file is None:
            network = q_network.DeepQLearner(
                defaults.RESIZED_WIDTH,
                defaults.RESIZED_HEIGHT,
                num_actions,
                parameters.phi_length,
                parameters.discount,
                parameters.learning_rate,
                parameters.rms_decay,
                parameters.rms_epsilon,
                parameters.momentum,
                parameters.clip_delta,
                parameters.freeze_interval,
                parameters.batch_size,
                parameters.network_type,
                parameters.update_rule,
                parameters.batch_accumulator,
                rng,
                double=parameters.double_dqn,
                transition_length=parameters.transition_length)
        else:
            handle = open(parameters.nn_file, 'r')
            network = cPickle.load(handle)

        agent = ale_agents.OptimalityTightening(
            network, parameters.epsilon_start, parameters.epsilon_min,
            parameters.epsilon_decay, parameters.replay_memory_size,
            parameters.experiment_prefix, parameters.update_frequency,
            parameters.replay_start_size, rng, parameters.transition_length,
            parameters.transition_range, parameters.penalty_method,
            parameters.weight_min, parameters.weight_max,
            parameters.annealing_len, parameters.beta, parameters.two_train,
            parameters.late2, parameters.close2, parameters.verbose,
            parameters.double_dqn, parameters.save_pkl)

    experiment = ale_experiment.ALEExperiment(
        ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT,
        parameters.resize_method, parameters.epochs,
        parameters.steps_per_epoch, parameters.steps_per_test,
        parameters.frame_skip, parameters.death_ends_episode,
        parameters.max_start_nullops, rng, parameters.flickering_buffer_size)

    experiment.run()
Exemple #17
0
import ale_python_interface
import numpy as np

# Init ale
ale = ale_python_interface.ALEInterface()
#ale.setInt('random_seed', 123) TODO: find out what this is

ale.setBool('display_screen', True)
ale.setFloat('repeat_action_probability', 0)

full_rom_path = "../roms/breakout.bin"
ale.loadROM(full_rom_path)

# testing ale
legal_actions = ale.getMinimalActionSet()
print(legal_actions)
total_reward = 0

repeat_action = 20
a_count = 0
index = 0

while not ale.game_over():

    if a_count > repeat_action:
        index = (index + 1) % len(legal_actions)
        a_count = 0

    a = legal_actions[index]

    print "Action: ", a
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    # Load VAE file
    with open(parameters.vae_aux_file, 'r') as f:
        aux_data = pickle.load(f)
    f.close()
    params = aux_data['params']

    with tf.device(None):  #"/gpu:0"):

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        # sess = tf.Session(config=config)

        # config = tf.ConfigProto(
        #     device_count={'GPU': 0}
        # )

        sess = tf.Session(config=config)

        VAE = vae.vae(params)
        VAE._create_network_()

        try:
            sess.run(tf.global_variables_initializer())
        except AttributeError:
            sess.run(tf.initialize_all_variables())

        saver = tf.train.Saver()
        chkpt = tf.train.get_checkpoint_state(parameters.vae_file)

        if chkpt and chkpt.model_checkpoint_path:
            saver.restore(sess, chkpt.model_checkpoint_path)
        else:
            print 'No checkpoint found'

    import theano
    import ale_experiment
    import ale_agent
    import q_network
    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'
    if parameters.nn_file is None:
        network = q_network.DeepQLearner(
            VAE.z_size, 1, num_actions, parameters.phi_length,
            parameters.discount, parameters.learning_rate,
            parameters.rms_decay, parameters.rms_epsilon, parameters.momentum,
            parameters.clip_delta, parameters.freeze_interval,
            parameters.batch_size, parameters.network_type,
            parameters.update_rule, parameters.batch_accumulator, rng)
    else:
        handle = open(parameters.nn_file, 'r')
        network = cPickle.load(handle)

    agent = ale_agent.NeuralAgent(
        network, parameters.epsilon_start, parameters.epsilon_min,
        parameters.epsilon_decay, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.replay_start_size,
        parameters.update_frequency, rng)

    experiment = ale_experiment.ALEExperiment(
        ale, agent, VAE.X_size[1], VAE.X_size[0], parameters.resize_method,
        parameters.epochs, parameters.steps_per_epoch,
        parameters.steps_per_test, parameters.frame_skip,
        parameters.death_ends_episode, parameters.max_start_nullops, rng, VAE,
        sess)

    experiment.run()
Exemple #19
0
def main():
    ale = ale_python_interface.ALEInterface()
    ale.loadROM('aleroms/pong.bin')
    actions = ale.getMinimalActionSet()

    params = shared.bindNew()

    model = make_model(len(actions))

    x = T.tensor3()
    r = T.scalar()
    a = T.iscalar()
    lr = theano.shared(numpy.float32(0.00025))

    out = model(x.dimshuffle('x', 0, 1, 2))
    v = out[0, -1]
    pol = T.nnet.softmax(out[:, :-1])[0]

    A = r - v

    logpi = T.log(pol[a])
    actor_loss = logpi * theano.gradient.disconnected_grad(A)
    critic_loss = A**2
    loss = T.mean(actor_loss + critic_loss)

    updates = rmsprop(0.999)(params, T.grad(loss, params), lr)

    train = theano.function([x, a, r], [critic_loss, actor_loss],
                            updates=updates)
    get_pol = theano.function([x], pol)
    get_v = theano.function([x], v)

    print 'compiled functions'

    def getImg():
        x = ale.getScreenGrayscale()
        return numpy.float32(scipy.misc.imresize(x[:, :, 0], (84, 84)) / 255.)

    ale.reset_game()
    x = [getImg()] * 4
    t_max = 5
    gamma = 0.99
    loss = 0
    t = 0
    tot_r = 0
    rs = []
    pp.ion()
    pp.show()
    for i in range(100000):
        traj = []
        for j in range(t_max):
            t += 1
            x = x[1:] + [getImg()]
            pol = get_pol(x)
            a = numpy.int32(numpy.argmax(numpy.random.multinomial(1, pol)))
            r = 0
            for _ in range(4):
                r += numpy.float32(ale.act(actions[a]))
            tot_r += r
            traj.append([x, a, r])
            if ale.game_over(): break
        R = 0 if ale.game_over() else get_v(traj[-1][0])
        for x, a, r in traj[:-1][::-1]:
            R = r + gamma * R
            loss += train(x, a, numpy.float32(R))[0]
        if ale.game_over():
            rs.append(tot_r)
            print i, t, loss, tot_r
            print pol
            ale.reset_game()
            x = [getImg()] * 4
            loss = 0
            t = 0
            tot_r = 0
            pp.clf()
            if len(rs) < 200:
                pp.plot(rs)
            else:
                try:
                    plotmeans(numpy.float32(rs))
                except Exception, e:
                    print e
            #pp.show(block=False)
            pp.draw()
            pp.pause(0.001)
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False) # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    agent = None

    if parameters.method == 'ec_dqn':
        if parameters.nn_file is None:
            network = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                             defaults.RESIZED_HEIGHT,
                                             num_actions,
                                             parameters.phi_length,
                                             parameters.discount,
                                             parameters.learning_rate,
                                             parameters.rms_decay,
                                             parameters.rms_epsilon,
                                             parameters.momentum,
                                             parameters.clip_delta,
                                             parameters.freeze_interval,
                                             parameters.batch_size,
                                             parameters.network_type,
                                             parameters.update_rule,
                                             parameters.batch_accumulator,
                                             rng, use_ec=True, double=parameters.double_dqn)
        else:
            handle = open(parameters.nn_file, 'r')
            network = cPickle.load(handle)

        if parameters.qec_table is None:
            qec_table = EC_functions.QECTable(parameters.knn,
                                              parameters.state_dimension,
                                              parameters.projection_type,
                                              defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT,
                                              parameters.buffer_size,
                                              num_actions,
                                              rng,
                                              parameters.rebuild_knn_frequency)
        else:
            handle = open(parameters.qec_table, 'r')
            qec_table = cPickle.load(handle)

        agent = ale_agents.EC_DQN(network,
                                  qec_table,
                                  parameters.epsilon_start,
                                  parameters.epsilon_min,
                                  parameters.epsilon_decay,
                                  parameters.replay_memory_size,
                                  parameters.experiment_prefix,
                                  parameters.replay_start_size,
                                  parameters.update_frequency,
                                  parameters.ec_discount,
                                  num_actions,
                                  parameters.ec_testing,
                                  rng)

    if parameters.method == 'dqn_episodic_memory1':
        if parameters.nn_file is None:
            network = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                             defaults.RESIZED_HEIGHT,
                                             num_actions,
                                             parameters.phi_length,
                                             parameters.discount,
                                             parameters.learning_rate,
                                             parameters.rms_decay,
                                             parameters.rms_epsilon,
                                             parameters.momentum,
                                             parameters.clip_delta,
                                             parameters.freeze_interval,
                                             parameters.batch_size,
                                             parameters.network_type,
                                             parameters.update_rule,
                                             parameters.batch_accumulator,
                                             rng, use_episodic_mem=True, double=parameters.double_dqn)
        else:
            handle = open(parameters.nn_file, 'r')
            network = cPickle.load(handle)

        if parameters.qec_table is None:
            qec_table = EC_functions.QECTable(parameters.knn,
                                              parameters.state_dimension,
                                              parameters.projection_type,
                                              defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT,
                                              parameters.buffer_size,
                                              num_actions,
                                              rng,
                                              parameters.rebuild_knn_frequency)
        else:
            handle = open(parameters.qec_table, 'r')
            qec_table = cPickle.load(handle)

        agent = ale_agents.NeuralNetworkEpisodicMemory1(network,
                                                        qec_table,
                                                        parameters.epsilon_start,
                                                        parameters.epsilon_min,
                                                        parameters.epsilon_decay,
                                                        parameters.replay_memory_size,
                                                        parameters.experiment_prefix,
                                                        parameters.replay_start_size,
                                                        parameters.update_frequency,
                                                        parameters.ec_discount,
                                                        num_actions,
                                                        parameters.ec_testing,
                                                        rng)
    if parameters.method == 'dqn_episodic_memory2':
        if parameters.nn_file is None:
            network = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                             defaults.RESIZED_HEIGHT,
                                             num_actions,
                                             parameters.phi_length,
                                             parameters.discount,
                                             parameters.learning_rate,
                                             parameters.rms_decay,
                                             parameters.rms_epsilon,
                                             parameters.momentum,
                                             parameters.clip_delta,
                                             parameters.freeze_interval,
                                             parameters.batch_size,
                                             parameters.network_type,
                                             parameters.update_rule,
                                             parameters.batch_accumulator,
                                             rng, use_episodic_mem=True, double=parameters.double_dqn)
        else:
            handle = open(parameters.nn_file, 'r')
            network = cPickle.load(handle)

        if parameters.qec_table is None:
            qec_table = EC_functions.QECTable(parameters.knn,
                                              parameters.state_dimension,
                                              parameters.projection_type,
                                              defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT,
                                              parameters.buffer_size,
                                              num_actions,
                                              rng,
                                              parameters.rebuild_knn_frequency)
        else:
            handle = open(parameters.qec_table, 'r')
            qec_table = cPickle.load(handle)

    if parameters.method == 'dqn_episodic_memory3':
        if parameters.nn_file is None:
            network = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                             defaults.RESIZED_HEIGHT,
                                             num_actions,
                                             parameters.phi_length,
                                             parameters.discount,
                                             parameters.learning_rate,
                                             parameters.rms_decay,
                                             parameters.rms_epsilon,
                                             parameters.momentum,
                                             parameters.clip_delta,
                                             parameters.freeze_interval,
                                             parameters.batch_size,
                                             parameters.network_type,
                                             parameters.update_rule,
                                             parameters.batch_accumulator,
                                             rng, use_episodic_mem=True, double=parameters.double_dqn)
        else:
            handle = open(parameters.nn_file, 'r')
            network = cPickle.load(handle)

        if parameters.qec_table is None:
            qec_table = EC_functions.LshHash(parameters.state_dimension,
                                             defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT,
                                             parameters.buffer_size,
                                             rng)
        else:
            handle = open(parameters.qec_table, 'r')
            qec_table = cPickle.load(handle)

        agent = ale_agents.NeuralNetworkEpisodicMemory3(network,
                                                        qec_table,
                                                        parameters.epsilon_start,
                                                        parameters.epsilon_min,
                                                        parameters.epsilon_decay,
                                                        parameters.replay_memory_size,
                                                        parameters.experiment_prefix,
                                                        parameters.replay_start_size,
                                                        parameters.update_frequency,
                                                        parameters.ec_discount,
                                                        num_actions,
                                                        parameters.ec_testing,
                                                        rng)

    if parameters.method == 'dqn':
        if parameters.nn_file is None:
            network = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                             defaults.RESIZED_HEIGHT,
                                             num_actions,
                                             parameters.phi_length,
                                             parameters.discount,
                                             parameters.learning_rate,
                                             parameters.rms_decay,
                                             parameters.rms_epsilon,
                                             parameters.momentum,
                                             parameters.clip_delta,
                                             parameters.freeze_interval,
                                             parameters.batch_size,
                                             parameters.network_type,
                                             parameters.update_rule,
                                             parameters.batch_accumulator,
                                             rng, double=parameters.double_dqn)
        else:
            handle = open(parameters.nn_file, 'r')
            network = cPickle.load(handle)

        agent = ale_agents.NeuralAgent(network,
                                       parameters.epsilon_start,
                                       parameters.epsilon_min,
                                       parameters.epsilon_decay,
                                       parameters.replay_memory_size,
                                       parameters.experiment_prefix,
                                       parameters.replay_start_size,
                                       parameters.update_frequency,
                                       rng)

    if parameters.method == 'episodic_control':
            if parameters.qec_table is None:
                qec_table = EC_functions.QECTable(parameters.knn,
                                                  parameters.state_dimension,
                                                  parameters.projection_type,
                                                  defaults.RESIZED_WIDTH*defaults.RESIZED_HEIGHT,
                                                  parameters.buffer_size,
                                                  num_actions,
                                                  rng,
                                                  parameters.rebuild_knn_frequency)
            else:
                handle = open(parameters.qec_table, 'r')
                qec_table = cPickle.load(handle)

            agent = ale_agents.EpisodicControl(qec_table,
                                               parameters.ec_discount,
                                               num_actions,
                                               parameters.epsilon_start,
                                               parameters.epsilon_min,
                                               parameters.epsilon_decay,
                                               parameters.experiment_prefix,
                                               parameters.ec_testing,
                                               rng)

    experiment = ale_experiment.ALEExperiment(ale, agent,
                                              defaults.RESIZED_WIDTH,
                                              defaults.RESIZED_HEIGHT,
                                              parameters.resize_method,
                                              parameters.epochs,
                                              parameters.steps_per_epoch,
                                              parameters.steps_per_test,
                                              parameters.frame_skip,
                                              parameters.death_ends_episode,
                                              parameters.max_start_nullops,
                                              rng)

    experiment.run()
Exemple #21
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    time_str = time.strftime("_%m-%d-%H-%M_", time.localtime())
    logging.basicConfig(filename='log' + time_str + '.txt', level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)
    ale.loadROM(full_rom_path)
    num_actions = len(ale.getMinimalActionSet())

    ######################################################
    # Daniel: This is where I insert human-guided stuff. #
    ######################################################

    # Logic to deal with loading a separate network trained on human data.
    # Must also address mapping from human net (0,1,2,...) to ALE.
    # I know that, for Breakout, my {0,1,2} correspond to {NOOP,LEFT,RIGHT}.
    # But how should these get mapped to ALE actions? I know 0=noop, 1=fire.
    # Keep in mind that there's a SECOND mapping that happens after this!
    map_action_index = None
    human_net = None
    human_experience_replay = None

    if parameters.use_human_net:
        if (rom == 'breakout' or rom == 'breakout.bin'):
            # This maps the action indices from the net (0,1,2,...) into a
            # **second** mapping [0 1 3 4], which is game-independent, so the
            # main work is to set map_action_index.
            # Thus, 0 ==> 0 ==> 0 (NOOP)
            # Thus, 1 ==> 3 ==> 4 (LEFT)
            # Thus, 2 ==> 2 ==> 3 (RIGHT)
            # (The net doesn't use FIRE.)
            map_action_index = {0: 0, 1: 3, 2: 2}
        elif (rom == 'space_invaders' or rom == 'space_invaders.bin'):
            # Second mapping is [0 1 3 4 11 12] E.g., 4 is FLEFT in my data,
            # needs to be mapped to index 5 so it results in '12'.
            map_action_index = {0: 0, 1: 1, 2: 3, 3: 2, 4: 5, 5: 4}
        else:
            raise ValueError("rom={} doesn't have action mapping".format(rom))

        # Let's make the human net; #actions = len(map_action_index).
        human_net = human_q_net.HumanQNetwork(
            defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT,
            len(map_action_index), parameters.phi_length,
            parameters.batch_size, parameters.network_type,
            parameters.human_net_path, map_action_index)

    if parameters.use_human_experience_replay:
        if (rom == 'breakout' or rom == 'breakout.bin'):
            human_experience_replay = np.load(
                parameters.human_experience_replay_path)
        else:
            raise ValueError("rom={} doesn't have xp replay".format(rom))

    ###########################
    # Daniel: Back to normal. #
    ###########################

    if parameters.nn_file is None:
        network = q_network.DeepQLearner(
            defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions,
            parameters.phi_length, parameters.discount,
            parameters.learning_rate, parameters.rms_decay,
            parameters.rms_epsilon, parameters.momentum, parameters.clip_delta,
            parameters.freeze_interval, parameters.batch_size,
            parameters.network_type, parameters.update_rule,
            parameters.batch_accumulator, rng)
    else:
        handle = open(parameters.nn_file, 'rb')
        network = cPickle.load(handle)

    agent = ale_agent.NeuralAgent(
        network, parameters.epsilon_start, parameters.epsilon_min,
        parameters.epsilon_decay, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.replay_start_size,
        parameters.update_frequency, rng, parameters.epochs,
        parameters.use_human_net, parameters.use_human_experience_replay,
        human_net, human_experience_replay)

    experiment = ale_experiment.ALEExperiment(
        ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT,
        parameters.resize_method, parameters.epochs,
        parameters.steps_per_epoch, parameters.steps_per_test,
        parameters.frame_skip, parameters.death_ends_episode,
        parameters.max_start_nullops, rng)

    experiment.run()
Exemple #22
0
def thread_learner_(thread_idx, global_params, t_grads, t_counters, running,
                    rs):
    numpy.random.seed(thread_idx)
    thread_counters = mpasnp(t_counters)
    thread_grads = map(mpasnp, t_grads)

    ale = ale_python_interface.ALEInterface()
    ale.loadROM('aleroms/breakout.bin')
    #ale.setInt('frame_skip', 4)
    ale.setFloat('repeat_action_probability', 0)
    ale.setBool('color_averaging', False)
    actions = ale.getMinimalActionSet()

    params = shared.bindNew()

    model = make_model(len(actions))
    train, get_pol, get_v, get_grads, update_from_grads, lr, beta = make_funcs(
        model, params)

    for gp, p in zip(global_params, params):
        p.set_value(mpasnp(gp), borrow=True)

    print 'compiled functions'

    def getImg():
        x = ale.getScreenGrayscale()
        return numpy.float32(scipy.misc.imresize(x[:, :, 0], (84, 84)) / 255.)

    ale.reset_game()
    x = [getImg()] * 4
    t_max = 5
    gamma = 0.99
    loss = 0
    t = 0
    tot_r = 0
    #rs = []
    #pp.ion()
    #pp.show()
    t1000 = time.time()
    frame_0 = ale.getEpisodeFrameNumber()
    for i in range(1000000):
        if running[3][0] < 1:
            break
        traj = []
        for j in range(t_max):
            t += 1
            x = x[1:] + [getImg()]
            pol = get_pol(x)
            a = numpy.int32(numpy.argmax(numpy.random.multinomial(1, pol)))
            r = 0
            for _ in range(4):
                r += numpy.float32(ale.act(actions[a]))
            tot_r += r
            traj.append([x, a, r])
            if ale.game_over(): break

        R = 0 if ale.game_over() else get_v(traj[-1][0])
        for x, a, r in traj[:-1][::-1]:
            R = r + gamma * R
            # loss += train(x,a,numpy.float32(R))[0]
            gs = get_grads(x, a, numpy.float32(R))
            #print 'pushing grads', thread_idx, thread_counters
            t0 = time.time()
            with t_grads[0][0].get_lock():
                #print '   ',time.time()-t0,thread_idx
                for g, tg in zip(gs, thread_grads):
                    tg += g
                thread_counters[thread_idx] += 1

        if ale.game_over():
            t1001 = time.time()

            print 'FPS:', ale.getEpisodeFrameNumber() / (t1001 - t1000)
            print '    ', ale.getEpisodeFrameNumber()
            print '    ', thread_idx
            t1000 = time.time()
            beta.set_value(numpy.float32(beta.get_value() * 0.99))
            rs.append(tot_r)
            print i, t, loss, tot_r
            print pol, beta.get_value()
            ale.reset_game()
            x = [getImg()] * 4
            loss = 0
            t = 0
            tot_r = 0
            if 0:
                pp.clf()
                if len(rs) < 200:
                    pp.plot(rs)
                else:
                    plotmeans(numpy.float32(rs))
                #pp.show(block=False)
                pp.draw()
                pp.pause(0.001)
Exemple #23
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    try:
        mode = int(parameters.mode)
    except ValueError:
        mode = 1

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    if parameters.experiment_directory:
        experiment_directory = parameters.experiment_directory
    else:
        time_str = time.strftime("_%Y-%m-%d-%H-%M")
        experiment_directory = parameters.experiment_prefix + time_str \
                                   + '_mode_' + str(mode)

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    if parameters.record_video:
        video_directory = os.path.join(experiment_directory, 'video')
        if not os.path.isdir(video_directory):
            os.makedirs(video_directory)

        ale.setString('record_screen_dir', video_directory)

        if sys.platform != 'darwin':
            ale.setBool('sound', True)
            ale.setString("record_sound_filename",
                          os.path.join(video_directory, "sound.wav"))
            # "We set fragsize to 64 to ensure proper sound sync"
            # (that's what videoRecordingExample.cpp in ALE says. I don't really know what it means)
            ale.setInt("fragsize", 64)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    ale.setMode(mode)

    if parameters.nn_file is None:
        network = q_network.DeepQLearner(
            defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions,
            parameters.phi_length, parameters.discount,
            parameters.learning_rate, parameters.rms_decay,
            parameters.rms_epsilon, parameters.momentum, parameters.clip_delta,
            parameters.freeze_interval, parameters.use_double,
            parameters.batch_size, parameters.network_type,
            parameters.update_rule, parameters.batch_accumulator, rng)
    else:
        handle = open(parameters.nn_file, 'r')
        network = cPickle.load(handle)

    agent = ale_agent.NeuralAgent(network,
                                  parameters.epsilon_start,
                                  parameters.epsilon_min,
                                  parameters.epsilon_decay,
                                  parameters.replay_memory_size,
                                  experiment_directory,
                                  parameters.replay_start_size,
                                  parameters.update_frequency,
                                  rng,
                                  recording=parameters.recording)

    experiment = ale_experiment.ALEExperiment(
        ale,
        agent,
        defaults.RESIZED_WIDTH,
        defaults.RESIZED_HEIGHT,
        parameters.resize_method,
        parameters.epochs,
        parameters.steps_per_epoch,
        parameters.steps_per_test,
        parameters.frame_skip,
        parameters.death_ends_episode,
        parameters.max_start_nullops,
        rng,
        length_in_episodes=parameters.episodes)

    experiment.run()