def init_random_exp_memory(self, size): if size > self.memory_size: size = self.memory_size game = self.get_game() self.exp_memory.add(game.get_state(), 0, 0, 0) for i in range(size): random_action = np.random.randint(0, self.num_actions) reward, is_terminal = game.execute_action(random_action) state = game.get_state() self.exp_memory.add(state, random_action, reward, is_terminal) if is_terminal: game.reset() self.exp_memory.add(game.get_state(), 0, 0, 0)
def play_game(game, fps=30): """ Asynchronously apply real-time game updates and broadcast state to all clients currently active in the game. Note that this loop must be initiated by a parallel thread for each active game game (Game object): Stores relevant game state. Note that the game id is the same as to socketio room id for all clients connected to this game fps (int): Number of game ticks that should happen every second """ status = Game.Status.ACTIVE while status != Game.Status.DONE and status != Game.Status.INACTIVE: with game.lock: status = game.tick() if status == Game.Status.RESET: with game.lock: data = game.get_data() socketio.emit('reset_game', { "state" : game.to_json(), "timeout" : game.reset_timeout, "data" : data}, room=game.id) socketio.sleep(game.reset_timeout/1000) else: socketio.emit('state_pong', { "state" : game.get_state() }, room=game.id) socketio.sleep(1/fps) with game.lock: data = game.get_data() socketio.emit('end_game', { "status" : status, "data" : data }, room=game.id) if status != Game.Status.INACTIVE: game.deactivate() cleanup_game(game)
def myproducts(): if request.cookies['login']: prefix = request.url_root.split('/')[2].split('.')[0] if prefix == "localhost:5000": prefix = "asdf" keyid=get_val('corp.cf.zone',":%s" % prefix, second)[:-(len(prefix)+1)] rec=json.loads(get_state(keyid, 'corp-userdata') or '') return render_template('myproducts.html', serno=rec['serno']) return render_template('base.html')
def init_exp_memory(self, size): if size > self.memory_size: size = self.memory_size game = self.get_game() self.exp_memory.add(game.get_state(), 0, 0, 0) for i in range(size): action = 0 if np.random.rand() < self.epsilon: action = np.random.randint(0, self.num_actions) else: action = self.qlearner.compute_action(game.get_state())[0] reward, is_terminal = game.execute_action(action) state = game.get_state() self.exp_memory.add(state, action, reward, is_terminal) if is_terminal: game.reset() self.exp_memory.add(game.get_state(), 0, 0, 0)
def login(): form = LoginForm(request.form) if request.method == 'POST' and form.validate(): prefix = request.url_root.split('/')[2].split('.')[0] if prefix == "localhost:5000": prefix = "asdf" keyid=get_val('corp.cf.zone',":%s" % prefix, second)[:-(len(prefix)+1)] rec=json.loads(get_state(keyid, 'corp-userdata') or '') if(form.email.data==rec['email'] and form.password.data==rec['password']): # f**k yeah plaintext passwords ftw! add_state(keyid, 'corp-login', '') response = make_response(redirect('/')) response.set_cookie('login', True) return response return render_template('login.html', form=form)
def find_max_games(self, num_steps, path, score_threshold): image_id = 0 game = self.get_game() frames = [] frames.append((np.copy(game.get_state()), 0.0)) max_game_score = 0 current_game_score = 0.0 for i in range(num_steps): if i % (num_steps // 10) == 0: print("At step {}".format(i)) action = self.qlearner.compute_action(game.get_state())[0] reward, is_terminal = game.execute_action(action) reward = self.renormalize_reward(reward) current_game_score += reward frames.append((np.copy(game.get_state()), current_game_score)) if is_terminal: game.reset() if current_game_score > max_game_score: max_game_score = current_game_score if current_game_score > score_threshold: print("Saving images...") for frame in frames: self.save_image(frame[0], path, image_id, 0, 0, 0, score=frame[1]) image_id += 1 frames = [] frames.append((np.copy(game.get_state()), 0.0)) current_game_score = 0.0 print("Max score: {}".format(max_game_score))
def eval_with_images(self, num_steps, path): image_id = 0 game = self.get_game() self.save_image(game.get_state(), path, image_id, 0, 0, 0, 0.0) total_score = 0 games_finished = 0 max_game_score = 0 current_game_score = 0.0 for i in range(num_steps): image_id += 1 action = self.qlearner.compute_action(game.get_state())[0] reward, is_terminal = game.execute_action(action) reward = self.renormalize_reward(reward) total_score += reward current_game_score += reward self.save_image(game.get_state(), path, image_id, action, reward, is_terminal, score=current_game_score) if is_terminal: game.reset() games_finished += 1 if current_game_score > max_game_score: max_game_score = current_game_score current_game_score = 0.0 self.save_image(game.get_state(), path, image_id, action, reward, is_terminal, score=current_game_score) print("Max score: {}".format(max_game_score))
def doxer(msg, mailid=None, host=None): try: keyid=get_val('dox-mailid',":%s" % mailid, second)[:-(len(mailid)+1)] except TypeError: #print >>sys.stderr, 'nomailid' return # no such mailid pwd=get_state(keyid, 'prod.pdf.pass') #logging.info("pwd "+pwd) if not pwd: add_state(keyid, 'prod.pdf.err',"I got a mail, but i was not aware of the password at that time. try to resend it after telling me the password please.") return err = None for mpart in msg.walk(): part=to_message(mpart) #if part.get_content_maintype() == 'multipart' or len(part.get_payload(decode=True)) < 268125: if part.get_content_maintype() == 'multipart': #print >>sys.stderr, 'skip', len(part.get_payload(decode=True) or ''), part.get_content_maintype() continue size = len(part.get_payload(decode=True)) if (size < 200000 or size > 310000): continue hash=hashlib.sha256() def hupdate(data): # workaround for http://bugs.python.org/issue17481 hash.update(data) ret=gpg('-d', '--passphrase', pwd, _ok_code=[0,2], _in=part.get_payload(decode=True), _out=hupdate) if ret.exit_code!=0: add_state(keyid, 'prod.pdf.err',"got a mail, but gpg had problems, try fixing the problem and resend the mail. gpg said this\n"+err) break #logging.info('ret '+str(ret)) #logging.info('stderr '+ret.stderr) err=str(ret.stderr) # for flushing the process? #print >>sys.stderr, 'err', err if hash.hexdigest() == '658be96015645fe1d646fd167c1ac3bd372360530191d574ace5870c5aeb132f': add_state(keyid, 'prod.pdf.done','1') break else: add_state(keyid, 'prod.pdf.err',"got a mail, but it wasn't quite what i expected, so i dropped it.") break else: add_state(keyid, 'prod.pdf.err',"got a mail, but there was nothing found that looked like a reasonably sized pgp payload")
def eval(self, num_steps): game = self.get_game() total_score = 0.0 current_score = 0.0 num_games = 1.0 max_score = 0.0 for i in range(num_steps): action = self.qlearner.compute_action(game.get_state())[0] reward, is_terminal = game.execute_action(action) reward = self.renormalize_reward(reward) current_score += reward total_score += reward if is_terminal: game.reset() if i < (num_steps - 1): num_games += 1 if current_score > max_score: max_score = current_score current_score = 0 average = total_score / num_games return total_score, num_games, average, max_score
game = game.Game(AREA_WIDTH, AREA_HEIGHT) user_play(game) agent = agent.Agent(ACTION_SIZE, DQN_MEMSIZE) stats = stats.Stats() score_sum = 0.0 time_sum = 0.0 score_cnt = 0.0 steps_wo_r = 0 quality_max = 0.0 for e in range(EPISODES): game.reset() state = game.get_state() for t in range(MAX_STEPS): action = agent.act(state) key = action2key[game.key][action] if int(e / 100) * 100 == e: game.render() print "key:", key2str[key], " action:", action2str[ action], " time:", t quality = score_sum / (score_cnt + 1) msg_str = "episode: {}/{}, epsilon: {:.2}, q: {:0.2f}, mem: {}, mem_done: {}, time: {}"\ .format(e, EPISODES, agent.epsilon, quality, len(agent.memory), len(agent.memory_done), time_sum/100.0) print msg_str # print "----------------" # game.render_dxy_state() # print "----------------" time.sleep(0.05)
def train(self): if self.model_loaded: self.init_exp_memory(self.exp_memory_start_size) else: self.init_random_exp_memory(self.exp_memory_start_size) total_reward = 0.0 games_played = 1 game = self.get_game() self.exp_memory.add(game.get_state(), 0, 0, 0) while self.curr_step < self.max_steps: #play one game step according to epsilon-greedy policy action = 0 if np.random.rand() < self.epsilon: action = np.random.randint(0, self.num_actions) else: action = self.qlearner.compute_action(game.get_state())[0] reward, is_terminal = game.execute_action(action) self.exp_memory.add(game.get_state(), action, reward, is_terminal) if is_terminal: game.reset() self.exp_memory.add(game.get_state(), 0, 0, 0) games_played += 1 total_reward += self.renormalize_reward(reward) #compute next epsilon self.epsilon = np.maximum(self.epsilon_min, self.epsilon - self.epsilon_step) self.memory_beta = np.minimum( self.memory_beta_end, self.memory_beta + self.memory_beta_step) if self.curr_step % self.update_freq == 0: #sample a batch of transitions from experience memory s, a, r, s2, t, indices, p_values = self.exp_memory.sample( self.batch_size) #output tensorboard summaries write_summary = False if (self.tensorboard_log_freq > 0) and ( self.curr_step % self.tensorboard_log_freq == 0): write_summary = True #beta is divided by 2 here because squared error loss squares beta _, _, td = self.qlearner.train_step( s, a, r, s2, t, p_values, self.memory_beta / 2.0, write_summary=write_summary) self.exp_memory.update_p(indices, td) #update target network if self.target_network_update_mode == "soft": if self.curr_step % self.update_freq == 0: self.qlearner.update_target_network() else: if self.curr_step % self.target_network_update_freq == 0: self.qlearner.update_target_network() #output current training status if self.curr_step % self.output_freq == 0: average_reward = total_reward / games_played total_reward = 0 games_played = 1 print("step: {} epsilon: {} average reward per game: {}". format(self.curr_step, self.epsilon, average_reward)) #evaluate current target network and save model if average score per game has improved if (self.curr_step % self.eval_freq == 0): score, num_games, average, max_score = self.eval( self.eval_steps) print("Evaluating model with {} steps:".format( self.eval_steps)) print( "Total score: {} Games: {} Average: {} Max: {}".format( score, num_games, average, max_score)) if average >= self.best_average_score: print("Improved average score") print("Saving model...") self.save() self.best_average_score = average #add average score to tensorboard summary = tf.Summary() summary.value.add(tag='average_score', simple_value=average) summary.value.add(tag='max_score', simple_value=max_score) self.qlearner.summary_writer.add_summary( summary, self.curr_step) self.curr_step += 1