Ejemplo n.º 1
0
 def reset(self):
     #游戏不能指定最大步数结束,只能自然死亡再来一局
     #        print('Please reset the game!!!')
     #get state
     self.pull_screenshot('autojump.jpg')
     obs = state('autojump.jpg')
     return obs
    def step_robot(self, action, arm):
        # do action
        press_time = self.action_to_presstime(action)
        print(press_time)
        press_time = press_time
        arm.jump(press_time / 1000)
        #        print('action:',action)
        #        print('press_time',press_time)
        time.sleep(4.5)

        # get state
        self.pull_screenshot('autojump.jpg')
        obs = state('autojump.jpg')

        # Game Over
        if self.restart('autojump.jpg'):
            done = 1
            reward = -1
            score = None
        else:
            done = 0
            reward = 1
            score = self.get_score('autojump.jpg')

        return obs, reward, done, score
Ejemplo n.º 3
0
def gen_prefix(code, data, parent=None):
    if parent:
        prefix = (parent << 8) | code
    else:
        prefix = code
    fname = 'prefix_%2x' % prefix
    print 'prefix', '%02X' % prefix, '...',
    table = '%2x' % prefix
    ret = [
        '# ' + '='*70,
        '# %2X prefix start' % prefix,
        '# ' + '='*70,
        '',
        'def %s(z80):' % fname,
    ]
    ret += [IDENT + x for x in read_op()]
    # if code in (0xDD, 0xFD, 0xED):
    ret.append('%s%s' % (IDENT, (ICOUNT % 4)))
    ret.append('%s%s' % (IDENT, (ITOTAL % 4)))
    ret += [
        '%(i)s%(r)s += 1' % {'i': IDENT, 'r': state('_r')},
        '%(i)s%(jp)s[tmp8](z80)' % {'i': IDENT, 'jp': gen_jp(table), 'op': read_op()},
        '%s[0x%02X] = %s' % (gen_jp('%2x' % parent if parent else None), code, fname),
        '# ' + '-'*70,
        ''
    ]
    for code, op in data.items():
        if not isinstance(op, dict): continue # skip extra data
        if op['multi_mn']: continue # not implemented for now
        # for prefixed ops: min. time used to fetch the prefix (4) must be substracted
        if isinstance(op['t'], list):
            op['t'] = [t - 4 for t in op['t']]
        else:
            op['t'] -= 4
        if op['asm'].startswith('shift'):
            gen = gen_prefix(code, op, parent=prefix)
        else:
            gen = gen_one(code, op, table='%2x' % prefix)
        if gen:
            ret += gen
            ret.append('')
    ret += [
        '# ' + '-'*70,
        '# %2X prefix end' % prefix,
        '# ' + '-'*70,
    ]
    print 'done.'
    return ret
Ejemplo n.º 4
0
    def step(self, action):
        #do action
        press_time = self.action_to_presstime(action)
        self.jump(press_time)
        #        print('action:',action)
        #        print('press_time',press_time)
        time.sleep(3.9)

        #get state
        self.pull_screenshot('autojump.jpg')
        obs = state('autojump.jpg')

        # Game Over
        if self.restart('autojump.jpg'):
            done = 1
            reward = -1
        else:
            done = 0
            reward = 1

        return obs, reward, done
Ejemplo n.º 5
0
def train_jump_after_data(env,
                          episodes,
                          data,
                          experiment_dir,
                          actor,
                          critic,
                          memory,
                          actor_lr,
                          critic_lr,
                          batch_size,
                          gamma,
                          tau=0.01):
    # build agent: action_range=(-1., 1.),reward_scale=1.
    agent = DDPG(actor,
                 critic,
                 memory,
                 env.observation_shape,
                 env.action_shape,
                 actor_lr=actor_lr,
                 critic_lr=critic_lr,
                 batch_size=batch_size,
                 gamma=gamma,
                 tau=tau)
    # put data into memory
    print('Loading ', len(data), ' memory...')
    for i in range(len(data)):
        obs = state(data.iat[i, 0])
        action = env.presstime_to_action(data.iat[i, 1])
        r = data.iat[i, 3]
        new_obs = state(data.iat[i, 2])
        done = data.iat[i, 4]
        agent.store_transition(obs, action, r, new_obs, done)

    # saver
    saver = tf.train.Saver()
    # ------add save dir--------
    checkpoint_dir = os.path.join(experiment_dir, "checkpoints")
    checkpoint_path = os.path.join(checkpoint_dir, "model")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    # summary dir
    summary_dir = os.path.join(experiment_dir, "summaries")
    if not os.path.exists(summary_dir):  # 如果路径不存在创建路径
        os.makedirs(summary_dir)
    summary_writer = tf.summary.FileWriter(summary_dir)
    summary = tf.Summary()
    episode_summary = tf.Summary()
    # ----------------------------
    with tf.Session() as sess:

        # load model if we have
        latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
        if latest_checkpoint:
            print("Loading model checkpoint {}...\n".format(latest_checkpoint))
            saver.restore(sess, latest_checkpoint)
            agent.sess = sess
        else:
            # Prepare everything.
            print('Building new model...')
            agent.initialize(sess)
        #         sess.graph.finalize()

        # ------------------------
        print('Training...')

        for episode in range(episodes):
            # set game
            #             print('new game')
            obs0 = env.reset()
            episode_reward = 0
            episode_step = 0

            while 1:

                # Train.
                cl, al = agent.train()
                global_step = sess.run(agent.global_step)
                # record loss
                summary.value.add(simple_value=cl, tag="critic_loss")
                summary.value.add(simple_value=al, tag="actor_loss")
                summary_writer.add_summary(summary, global_step)

                #             #record graph
                #             summary_writer.add_graph(sess.graph)

                # flush
                summary_writer.flush()

                # update model
                agent.update_target_net()

                # -----------------------------------
                # get action
                feed_dict = {agent.obs0: [obs0]}
                action = sess.run(agent.actor_tf, feed_dict=feed_dict)
                action = action.flatten()

                # do action
                obs1, reward, done = env.step(action)
                episode_reward += reward
                episode_step += 1

                # store transition
                agent.store_transition(obs0, action, reward, obs1, done)
                obs0 = obs1

                if done:
                    episode_summary.value.add(simple_value=episode_reward,
                                              tag="episode_reward")
                    episode_summary.value.add(simple_value=episode_step,
                                              tag="episode_step")
                    summary_writer.add_summary(episode_summary, episode)
                    summary_writer.flush()
                    #                     print('dead at',episode_step)
                    break

                # ----------------------------------------------------------

            # save model every 100 episodes
            if episode % 100 == 0:
                saver.save(tf.get_default_session(), checkpoint_path)

    print('Training completed!')
Ejemplo n.º 6
0
def train_on_data_online(env,
                         steps,
                         data,
                         experiment_dir,
                         actor,
                         critic,
                         memory,
                         actor_lr,
                         critic_lr,
                         batch_size,
                         gamma,
                         tau=0.01):

    #build agent: action_range=(-1., 1.),reward_scale=1.
    agent = DDPG(actor,
                 critic,
                 memory,
                 env.observation_shape,
                 env.action_shape,
                 actor_lr=actor_lr,
                 critic_lr=critic_lr,
                 batch_size=batch_size,
                 gamma=gamma,
                 tau=tau)
    #put data into memory
    init_data = 1000
    print('Loading ', init_data, ' memory...')
    assert len(data) >= init_data
    for i in range(init_data):
        obs = state(data.iat[i, 0])
        action = env.presstime_to_action(data.iat[i, 1])
        r = data.iat[i, 3]
        new_obs = state(data.iat[i, 2])
        done = data.iat[i, 4]
        agent.store_transition(obs, action, r, new_obs, done)

    #saver
    saver = tf.train.Saver()
    #------add save dir--------
    checkpoint_dir = os.path.join(experiment_dir, "checkpoints")
    checkpoint_path = os.path.join(checkpoint_dir, "model")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    #summary dir
    summary_dir = os.path.join(experiment_dir, "summaries")
    if not os.path.exists(summary_dir):  #如果路径不存在创建路径
        os.makedirs(summary_dir)
    summary_writer = tf.summary.FileWriter(summary_dir)
    summary = tf.Summary()
    #----------------------------
    with tf.Session() as sess:

        #load model if we have
        latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
        if latest_checkpoint:
            print("Loading model checkpoint {}...\n".format(latest_checkpoint))
            saver.restore(sess, latest_checkpoint)
            agent.sess = sess
        else:
            # Prepare everything.
            print('Building new model...')
            agent.initialize(sess)
        #         sess.graph.finalize()

        #------------------------
        print('Training...')
        data_inx = init_data

        for step in range(steps):
            #load 1 data if there are more data
            if data_inx < len(data):
                obs = state(data.iat[data_inx, 0])
                action = env.presstime_to_action(data.iat[data_inx, 1])
                r = data.iat[data_inx, 3]
                new_obs = state(data.iat[data_inx, 2])
                done = data.iat[data_inx, 4]
                agent.store_transition(obs, action, r, new_obs, done)
                data_inx += 1

            # Train.
            cl, al = agent.train()
            global_step = sess.run(agent.global_step)
            #record loss
            summary.value.add(simple_value=cl, tag="critic_loss")
            summary.value.add(simple_value=al, tag="actor_loss")
            summary_writer.add_summary(summary, global_step)

            #             #record graph
            #             summary_writer.add_graph(sess.graph)

            #flush
            summary_writer.flush()

            #update model
            agent.update_target_net()

            #save model every 1000 steps
            if step % 1000 == 0:
                saver.save(tf.get_default_session(), checkpoint_path)

    print('Training completed!')
Ejemplo n.º 7
0
def train_on_data(env,
                  steps,
                  data,
                  experiment_dir,
                  actor,
                  critic,
                  memory,
                  actor_lr,
                  critic_lr,
                  batch_size,
                  gamma,
                  tau=0.01):

    #build agent: action_range=(-1., 1.),reward_scale=1.
    agent = DDPG(actor,
                 critic,
                 memory,
                 env.observation_shape,
                 env.action_shape,
                 actor_lr=actor_lr,
                 critic_lr=critic_lr,
                 batch_size=batch_size,
                 gamma=gamma,
                 tau=tau)

    #put data into memory
    print('Loading memory...')
    for i in range(len(data)):
        obs = state(data.iat[i, 0])
        action = env.presstime_to_action(data.iat[i, 1])
        r = data.iat[i, 3]
        new_obs = state(data.iat[i, 2])
        done = data.iat[i, 4]
        agent.store_transition(obs, action, r, new_obs, done)

    #saver
    saver = tf.train.Saver()
    #------add save dir--------
    checkpoint_dir = os.path.join(experiment_dir, "checkpoints")
    checkpoint_path = os.path.join(checkpoint_dir, "model")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    #summary dir
    summary_dir = os.path.join(experiment_dir, "summaries")
    if not os.path.exists(summary_dir):  #如果路径不存在创建路径
        os.makedirs(summary_dir)
    summary_writer = tf.summary.FileWriter(summary_dir)
    summary = tf.Summary()
    #----------------------------
    with tf.Session() as sess:

        # Prepare everything.
        agent.initialize(sess)
        #         sess.graph.finalize()

        #------------------------
        print('Training...')
        for step in range(steps):

            t0 = time.time()

            # Train.
            cl, al = agent.train()

            t1 = time.time()
            tt = t1 - t0

            #record loss
            summary.value.add(simple_value=cl, tag="critic_loss")
            summary.value.add(simple_value=al, tag="actor_loss")
            summary.value.add(simple_value=tt, tag="train_time")
            summary_writer.add_summary(summary, step)

            #record graph
            summary_writer.add_graph(sess.graph)

            #flush
            summary_writer.flush()

            #update model
            agent.update_target_net()

            #save model every 100 steps
            if step % 100 == 0:
                saver.save(tf.get_default_session(), checkpoint_path)

    print('Training completed.')