コード例 #1
0
    def init_random_exp_memory(self, size):
        if size > self.memory_size:
            size = self.memory_size

        game = self.get_game()
        self.exp_memory.add(game.get_state(), 0, 0, 0)
        for i in range(size):
            random_action = np.random.randint(0, self.num_actions)
            reward, is_terminal = game.execute_action(random_action)
            state = game.get_state()
            self.exp_memory.add(state, random_action, reward, is_terminal)
            if is_terminal:
                game.reset()
                self.exp_memory.add(game.get_state(), 0, 0, 0)
コード例 #2
0
def play_game(game, fps=30):
    """
    Asynchronously apply real-time game updates and broadcast state to all clients currently active
    in the game. Note that this loop must be initiated by a parallel thread for each active game

    game (Game object):     Stores relevant game state. Note that the game id is the same as to socketio
                            room id for all clients connected to this game
    fps (int):              Number of game ticks that should happen every second
    """
    status = Game.Status.ACTIVE
    while status != Game.Status.DONE and status != Game.Status.INACTIVE:
        with game.lock:
            status = game.tick()
        if status == Game.Status.RESET:
            with game.lock:
                data = game.get_data()
            socketio.emit('reset_game', { "state" : game.to_json(), "timeout" : game.reset_timeout, "data" : data}, room=game.id)
            socketio.sleep(game.reset_timeout/1000)
        else:
            socketio.emit('state_pong', { "state" : game.get_state() }, room=game.id)
        socketio.sleep(1/fps)
    
    with game.lock:
        data = game.get_data()
        socketio.emit('end_game', { "status" : status, "data" : data }, room=game.id)

        if status != Game.Status.INACTIVE:
            game.deactivate()
        cleanup_game(game)
コード例 #3
0
ファイル: webapp.py プロジェクト: rafapolo/privacy-challenge
def myproducts():
  if request.cookies['login']:
      prefix = request.url_root.split('/')[2].split('.')[0]
      if prefix == "localhost:5000":
          prefix = "asdf"
      keyid=get_val('corp.cf.zone',":%s" % prefix, second)[:-(len(prefix)+1)]
      rec=json.loads(get_state(keyid, 'corp-userdata') or '')
      return render_template('myproducts.html', serno=rec['serno'])
  return render_template('base.html')
コード例 #4
0
    def init_exp_memory(self, size):
        if size > self.memory_size:
            size = self.memory_size

        game = self.get_game()
        self.exp_memory.add(game.get_state(), 0, 0, 0)
        for i in range(size):
            action = 0
            if np.random.rand() < self.epsilon:
                action = np.random.randint(0, self.num_actions)
            else:
                action = self.qlearner.compute_action(game.get_state())[0]
            reward, is_terminal = game.execute_action(action)
            state = game.get_state()
            self.exp_memory.add(state, action, reward, is_terminal)
            if is_terminal:
                game.reset()
                self.exp_memory.add(game.get_state(), 0, 0, 0)
コード例 #5
0
ファイル: webapp.py プロジェクト: rafapolo/privacy-challenge
def login():
    form = LoginForm(request.form)
    if request.method == 'POST' and form.validate():
        prefix = request.url_root.split('/')[2].split('.')[0]
        if prefix == "localhost:5000":
            prefix = "asdf"
        keyid=get_val('corp.cf.zone',":%s" % prefix, second)[:-(len(prefix)+1)]
        rec=json.loads(get_state(keyid, 'corp-userdata') or '')
        if(form.email.data==rec['email'] and form.password.data==rec['password']): # f**k yeah plaintext passwords ftw!
            add_state(keyid, 'corp-login', '')
            response = make_response(redirect('/'))
            response.set_cookie('login', True)
            return response
    return render_template('login.html', form=form)
コード例 #6
0
    def find_max_games(self, num_steps, path, score_threshold):
        image_id = 0
        game = self.get_game()
        frames = []
        frames.append((np.copy(game.get_state()), 0.0))
        max_game_score = 0
        current_game_score = 0.0
        for i in range(num_steps):
            if i % (num_steps // 10) == 0:
                print("At step {}".format(i))
            action = self.qlearner.compute_action(game.get_state())[0]
            reward, is_terminal = game.execute_action(action)
            reward = self.renormalize_reward(reward)
            current_game_score += reward
            frames.append((np.copy(game.get_state()), current_game_score))
            if is_terminal:
                game.reset()
                if current_game_score > max_game_score:
                    max_game_score = current_game_score

                if current_game_score > score_threshold:
                    print("Saving images...")
                    for frame in frames:
                        self.save_image(frame[0],
                                        path,
                                        image_id,
                                        0,
                                        0,
                                        0,
                                        score=frame[1])
                        image_id += 1

                frames = []
                frames.append((np.copy(game.get_state()), 0.0))
                current_game_score = 0.0

        print("Max score: {}".format(max_game_score))
コード例 #7
0
    def eval_with_images(self, num_steps, path):
        image_id = 0
        game = self.get_game()
        self.save_image(game.get_state(), path, image_id, 0, 0, 0, 0.0)
        total_score = 0
        games_finished = 0
        max_game_score = 0
        current_game_score = 0.0
        for i in range(num_steps):
            image_id += 1
            action = self.qlearner.compute_action(game.get_state())[0]
            reward, is_terminal = game.execute_action(action)
            reward = self.renormalize_reward(reward)
            total_score += reward
            current_game_score += reward
            self.save_image(game.get_state(),
                            path,
                            image_id,
                            action,
                            reward,
                            is_terminal,
                            score=current_game_score)
            if is_terminal:
                game.reset()
                games_finished += 1
                if current_game_score > max_game_score:
                    max_game_score = current_game_score
                current_game_score = 0.0
                self.save_image(game.get_state(),
                                path,
                                image_id,
                                action,
                                reward,
                                is_terminal,
                                score=current_game_score)

        print("Max score: {}".format(max_game_score))
コード例 #8
0
ファイル: sample.py プロジェクト: stef/pc-mailer
def doxer(msg, mailid=None, host=None):
    try:
        keyid=get_val('dox-mailid',":%s" % mailid, second)[:-(len(mailid)+1)]
    except TypeError:
        #print >>sys.stderr, 'nomailid'
        return # no such mailid
    pwd=get_state(keyid, 'prod.pdf.pass')
    #logging.info("pwd "+pwd)
    if not pwd:
        add_state(keyid, 'prod.pdf.err',"I got a mail, but i was not aware of the password at that time. try to resend it after telling me the password please.")
        return
    err = None
    for mpart in msg.walk():
        part=to_message(mpart)
        #if part.get_content_maintype() == 'multipart' or len(part.get_payload(decode=True)) < 268125:
        if part.get_content_maintype() == 'multipart':
            #print >>sys.stderr, 'skip', len(part.get_payload(decode=True) or ''), part.get_content_maintype()
            continue
        size = len(part.get_payload(decode=True))
        if (size < 200000 or size > 310000):
            continue
        hash=hashlib.sha256()
        def hupdate(data): # workaround for http://bugs.python.org/issue17481
            hash.update(data)
        ret=gpg('-d',
                '--passphrase', pwd,
                _ok_code=[0,2],
                _in=part.get_payload(decode=True),
                _out=hupdate)
        if ret.exit_code!=0:
            add_state(keyid, 'prod.pdf.err',"got a mail, but gpg had problems, try fixing the problem and resend the mail. gpg said this\n"+err)
            break
        #logging.info('ret '+str(ret))
        #logging.info('stderr '+ret.stderr)
        err=str(ret.stderr) # for flushing the process?
        #print >>sys.stderr, 'err', err
        if hash.hexdigest() == '658be96015645fe1d646fd167c1ac3bd372360530191d574ace5870c5aeb132f':
            add_state(keyid, 'prod.pdf.done','1')
            break
        else:
            add_state(keyid, 'prod.pdf.err',"got a mail, but it wasn't quite what i expected, so i dropped it.")
            break
    else:
        add_state(keyid, 'prod.pdf.err',"got a mail, but there was nothing found that looked like a reasonably sized pgp payload")
コード例 #9
0
    def eval(self, num_steps):
        game = self.get_game()
        total_score = 0.0
        current_score = 0.0
        num_games = 1.0
        max_score = 0.0
        for i in range(num_steps):
            action = self.qlearner.compute_action(game.get_state())[0]
            reward, is_terminal = game.execute_action(action)
            reward = self.renormalize_reward(reward)
            current_score += reward
            total_score += reward
            if is_terminal:
                game.reset()
                if i < (num_steps - 1):
                    num_games += 1
                    if current_score > max_score:
                        max_score = current_score
                    current_score = 0

        average = total_score / num_games

        return total_score, num_games, average, max_score
コード例 #10
0
    game = game.Game(AREA_WIDTH, AREA_HEIGHT)
    user_play(game)

    agent = agent.Agent(ACTION_SIZE, DQN_MEMSIZE)

    stats = stats.Stats()

    score_sum = 0.0
    time_sum = 0.0
    score_cnt = 0.0
    steps_wo_r = 0
    quality_max = 0.0

    for e in range(EPISODES):
        game.reset()
        state = game.get_state()
        for t in range(MAX_STEPS):
            action = agent.act(state)
            key = action2key[game.key][action]
            if int(e / 100) * 100 == e:
                game.render()
                print "key:", key2str[key], "    action:", action2str[
                    action], "   time:", t
                quality = score_sum / (score_cnt + 1)
                msg_str = "episode: {}/{}, epsilon: {:.2}, q: {:0.2f}, mem: {}, mem_done: {}, time: {}"\
                 .format(e, EPISODES, agent.epsilon, quality, len(agent.memory), len(agent.memory_done), time_sum/100.0)
                print msg_str
                #	print "----------------"
                #	game.render_dxy_state()
                #	print "----------------"
                time.sleep(0.05)
コード例 #11
0
    def train(self):
        if self.model_loaded:
            self.init_exp_memory(self.exp_memory_start_size)
        else:
            self.init_random_exp_memory(self.exp_memory_start_size)

        total_reward = 0.0
        games_played = 1

        game = self.get_game()
        self.exp_memory.add(game.get_state(), 0, 0, 0)

        while self.curr_step < self.max_steps:
            #play one game step according to epsilon-greedy policy
            action = 0
            if np.random.rand() < self.epsilon:
                action = np.random.randint(0, self.num_actions)
            else:
                action = self.qlearner.compute_action(game.get_state())[0]

            reward, is_terminal = game.execute_action(action)
            self.exp_memory.add(game.get_state(), action, reward, is_terminal)
            if is_terminal:
                game.reset()
                self.exp_memory.add(game.get_state(), 0, 0, 0)
                games_played += 1

            total_reward += self.renormalize_reward(reward)

            #compute next epsilon
            self.epsilon = np.maximum(self.epsilon_min,
                                      self.epsilon - self.epsilon_step)
            self.memory_beta = np.minimum(
                self.memory_beta_end, self.memory_beta + self.memory_beta_step)

            if self.curr_step % self.update_freq == 0:
                #sample a batch of transitions from experience memory
                s, a, r, s2, t, indices, p_values = self.exp_memory.sample(
                    self.batch_size)

                #output tensorboard summaries
                write_summary = False
                if (self.tensorboard_log_freq > 0) and (
                        self.curr_step % self.tensorboard_log_freq == 0):
                    write_summary = True

                #beta is divided by 2 here because squared error loss squares beta
                _, _, td = self.qlearner.train_step(
                    s,
                    a,
                    r,
                    s2,
                    t,
                    p_values,
                    self.memory_beta / 2.0,
                    write_summary=write_summary)
                self.exp_memory.update_p(indices, td)

            #update target network
            if self.target_network_update_mode == "soft":
                if self.curr_step % self.update_freq == 0:
                    self.qlearner.update_target_network()
            else:
                if self.curr_step % self.target_network_update_freq == 0:
                    self.qlearner.update_target_network()

            #output current training status
            if self.curr_step % self.output_freq == 0:
                average_reward = total_reward / games_played
                total_reward = 0
                games_played = 1
                print("step: {}  epsilon: {}  average reward per game: {}".
                      format(self.curr_step, self.epsilon, average_reward))

            #evaluate current target network and save model if average score per game has improved
            if (self.curr_step % self.eval_freq == 0):
                score, num_games, average, max_score = self.eval(
                    self.eval_steps)
                print("Evaluating model with {} steps:".format(
                    self.eval_steps))
                print(
                    "Total score: {}  Games: {}  Average: {}  Max: {}".format(
                        score, num_games, average, max_score))
                if average >= self.best_average_score:
                    print("Improved average score")
                    print("Saving model...")
                    self.save()
                    self.best_average_score = average
                #add average score to tensorboard
                summary = tf.Summary()
                summary.value.add(tag='average_score', simple_value=average)
                summary.value.add(tag='max_score', simple_value=max_score)
                self.qlearner.summary_writer.add_summary(
                    summary, self.curr_step)

            self.curr_step += 1