def reset(self): #游戏不能指定最大步数结束,只能自然死亡再来一局 # print('Please reset the game!!!') #get state self.pull_screenshot('autojump.jpg') obs = state('autojump.jpg') return obs
def step_robot(self, action, arm): # do action press_time = self.action_to_presstime(action) print(press_time) press_time = press_time arm.jump(press_time / 1000) # print('action:',action) # print('press_time',press_time) time.sleep(4.5) # get state self.pull_screenshot('autojump.jpg') obs = state('autojump.jpg') # Game Over if self.restart('autojump.jpg'): done = 1 reward = -1 score = None else: done = 0 reward = 1 score = self.get_score('autojump.jpg') return obs, reward, done, score
def gen_prefix(code, data, parent=None): if parent: prefix = (parent << 8) | code else: prefix = code fname = 'prefix_%2x' % prefix print 'prefix', '%02X' % prefix, '...', table = '%2x' % prefix ret = [ '# ' + '='*70, '# %2X prefix start' % prefix, '# ' + '='*70, '', 'def %s(z80):' % fname, ] ret += [IDENT + x for x in read_op()] # if code in (0xDD, 0xFD, 0xED): ret.append('%s%s' % (IDENT, (ICOUNT % 4))) ret.append('%s%s' % (IDENT, (ITOTAL % 4))) ret += [ '%(i)s%(r)s += 1' % {'i': IDENT, 'r': state('_r')}, '%(i)s%(jp)s[tmp8](z80)' % {'i': IDENT, 'jp': gen_jp(table), 'op': read_op()}, '%s[0x%02X] = %s' % (gen_jp('%2x' % parent if parent else None), code, fname), '# ' + '-'*70, '' ] for code, op in data.items(): if not isinstance(op, dict): continue # skip extra data if op['multi_mn']: continue # not implemented for now # for prefixed ops: min. time used to fetch the prefix (4) must be substracted if isinstance(op['t'], list): op['t'] = [t - 4 for t in op['t']] else: op['t'] -= 4 if op['asm'].startswith('shift'): gen = gen_prefix(code, op, parent=prefix) else: gen = gen_one(code, op, table='%2x' % prefix) if gen: ret += gen ret.append('') ret += [ '# ' + '-'*70, '# %2X prefix end' % prefix, '# ' + '-'*70, ] print 'done.' return ret
def step(self, action): #do action press_time = self.action_to_presstime(action) self.jump(press_time) # print('action:',action) # print('press_time',press_time) time.sleep(3.9) #get state self.pull_screenshot('autojump.jpg') obs = state('autojump.jpg') # Game Over if self.restart('autojump.jpg'): done = 1 reward = -1 else: done = 0 reward = 1 return obs, reward, done
def train_jump_after_data(env, episodes, data, experiment_dir, actor, critic, memory, actor_lr, critic_lr, batch_size, gamma, tau=0.01): # build agent: action_range=(-1., 1.),reward_scale=1. agent = DDPG(actor, critic, memory, env.observation_shape, env.action_shape, actor_lr=actor_lr, critic_lr=critic_lr, batch_size=batch_size, gamma=gamma, tau=tau) # put data into memory print('Loading ', len(data), ' memory...') for i in range(len(data)): obs = state(data.iat[i, 0]) action = env.presstime_to_action(data.iat[i, 1]) r = data.iat[i, 3] new_obs = state(data.iat[i, 2]) done = data.iat[i, 4] agent.store_transition(obs, action, r, new_obs, done) # saver saver = tf.train.Saver() # ------add save dir-------- checkpoint_dir = os.path.join(experiment_dir, "checkpoints") checkpoint_path = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) # summary dir summary_dir = os.path.join(experiment_dir, "summaries") if not os.path.exists(summary_dir): # 如果路径不存在创建路径 os.makedirs(summary_dir) summary_writer = tf.summary.FileWriter(summary_dir) summary = tf.Summary() episode_summary = tf.Summary() # ---------------------------- with tf.Session() as sess: # load model if we have latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) if latest_checkpoint: print("Loading model checkpoint {}...\n".format(latest_checkpoint)) saver.restore(sess, latest_checkpoint) agent.sess = sess else: # Prepare everything. print('Building new model...') agent.initialize(sess) # sess.graph.finalize() # ------------------------ print('Training...') for episode in range(episodes): # set game # print('new game') obs0 = env.reset() episode_reward = 0 episode_step = 0 while 1: # Train. cl, al = agent.train() global_step = sess.run(agent.global_step) # record loss summary.value.add(simple_value=cl, tag="critic_loss") summary.value.add(simple_value=al, tag="actor_loss") summary_writer.add_summary(summary, global_step) # #record graph # summary_writer.add_graph(sess.graph) # flush summary_writer.flush() # update model agent.update_target_net() # ----------------------------------- # get action feed_dict = {agent.obs0: [obs0]} action = sess.run(agent.actor_tf, feed_dict=feed_dict) action = action.flatten() # do action obs1, reward, done = env.step(action) episode_reward += reward episode_step += 1 # store transition agent.store_transition(obs0, action, reward, obs1, done) obs0 = obs1 if done: episode_summary.value.add(simple_value=episode_reward, tag="episode_reward") episode_summary.value.add(simple_value=episode_step, tag="episode_step") summary_writer.add_summary(episode_summary, episode) summary_writer.flush() # print('dead at',episode_step) break # ---------------------------------------------------------- # save model every 100 episodes if episode % 100 == 0: saver.save(tf.get_default_session(), checkpoint_path) print('Training completed!')
def train_on_data_online(env, steps, data, experiment_dir, actor, critic, memory, actor_lr, critic_lr, batch_size, gamma, tau=0.01): #build agent: action_range=(-1., 1.),reward_scale=1. agent = DDPG(actor, critic, memory, env.observation_shape, env.action_shape, actor_lr=actor_lr, critic_lr=critic_lr, batch_size=batch_size, gamma=gamma, tau=tau) #put data into memory init_data = 1000 print('Loading ', init_data, ' memory...') assert len(data) >= init_data for i in range(init_data): obs = state(data.iat[i, 0]) action = env.presstime_to_action(data.iat[i, 1]) r = data.iat[i, 3] new_obs = state(data.iat[i, 2]) done = data.iat[i, 4] agent.store_transition(obs, action, r, new_obs, done) #saver saver = tf.train.Saver() #------add save dir-------- checkpoint_dir = os.path.join(experiment_dir, "checkpoints") checkpoint_path = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) #summary dir summary_dir = os.path.join(experiment_dir, "summaries") if not os.path.exists(summary_dir): #如果路径不存在创建路径 os.makedirs(summary_dir) summary_writer = tf.summary.FileWriter(summary_dir) summary = tf.Summary() #---------------------------- with tf.Session() as sess: #load model if we have latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) if latest_checkpoint: print("Loading model checkpoint {}...\n".format(latest_checkpoint)) saver.restore(sess, latest_checkpoint) agent.sess = sess else: # Prepare everything. print('Building new model...') agent.initialize(sess) # sess.graph.finalize() #------------------------ print('Training...') data_inx = init_data for step in range(steps): #load 1 data if there are more data if data_inx < len(data): obs = state(data.iat[data_inx, 0]) action = env.presstime_to_action(data.iat[data_inx, 1]) r = data.iat[data_inx, 3] new_obs = state(data.iat[data_inx, 2]) done = data.iat[data_inx, 4] agent.store_transition(obs, action, r, new_obs, done) data_inx += 1 # Train. cl, al = agent.train() global_step = sess.run(agent.global_step) #record loss summary.value.add(simple_value=cl, tag="critic_loss") summary.value.add(simple_value=al, tag="actor_loss") summary_writer.add_summary(summary, global_step) # #record graph # summary_writer.add_graph(sess.graph) #flush summary_writer.flush() #update model agent.update_target_net() #save model every 1000 steps if step % 1000 == 0: saver.save(tf.get_default_session(), checkpoint_path) print('Training completed!')
def train_on_data(env, steps, data, experiment_dir, actor, critic, memory, actor_lr, critic_lr, batch_size, gamma, tau=0.01): #build agent: action_range=(-1., 1.),reward_scale=1. agent = DDPG(actor, critic, memory, env.observation_shape, env.action_shape, actor_lr=actor_lr, critic_lr=critic_lr, batch_size=batch_size, gamma=gamma, tau=tau) #put data into memory print('Loading memory...') for i in range(len(data)): obs = state(data.iat[i, 0]) action = env.presstime_to_action(data.iat[i, 1]) r = data.iat[i, 3] new_obs = state(data.iat[i, 2]) done = data.iat[i, 4] agent.store_transition(obs, action, r, new_obs, done) #saver saver = tf.train.Saver() #------add save dir-------- checkpoint_dir = os.path.join(experiment_dir, "checkpoints") checkpoint_path = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) #summary dir summary_dir = os.path.join(experiment_dir, "summaries") if not os.path.exists(summary_dir): #如果路径不存在创建路径 os.makedirs(summary_dir) summary_writer = tf.summary.FileWriter(summary_dir) summary = tf.Summary() #---------------------------- with tf.Session() as sess: # Prepare everything. agent.initialize(sess) # sess.graph.finalize() #------------------------ print('Training...') for step in range(steps): t0 = time.time() # Train. cl, al = agent.train() t1 = time.time() tt = t1 - t0 #record loss summary.value.add(simple_value=cl, tag="critic_loss") summary.value.add(simple_value=al, tag="actor_loss") summary.value.add(simple_value=tt, tag="train_time") summary_writer.add_summary(summary, step) #record graph summary_writer.add_graph(sess.graph) #flush summary_writer.flush() #update model agent.update_target_net() #save model every 100 steps if step % 100 == 0: saver.save(tf.get_default_session(), checkpoint_path) print('Training completed.')