예제 #1
0
    def __getitem__(self, idx):
        if self.examples:
            item = json.loads(self.examples[idx])
        else:
            result = bisect.bisect_left(self.file_offsets,
                                        (idx, len(self.fnames))) - 1
            offset, fnames_idx = self.file_offsets[result]
            fname = self.fnames[fnames_idx]
            to_go = idx - offset
            for i, item in enumerate(jsons.stream(fname)):
                if i == to_go:
                    break
            else:
                raise ValueError('offset too high: %s, %s, %s' %
                                 (idx, offset, fname))

        if self.transform:
            item = self.transform(item)

        fs = self.policy.extract(item['state'], item['candidates'])
        rv = {
            'actions': np.asarray([item['action']], dtype=config.nt()),
            'advantages': np.asarray([item['advantage']], dtype=config.nt()),
            'value_preds': np.asarray([item['value_pred']], dtype=config.nt()),
            'returns': np.asarray([item['return']], dtype=config.nt()),
        }

        for k in ['probs', 'log_probs']:
            if k in item:
                rv[k] = item[k]

        for k, v in fs.items():
            rv['features_' + k] = v

        return rv
예제 #2
0
def box(s):
    if not isinstance(s, (list, tuple, np.ndarray)):
        s = (s, )
    arr = np.asarray(s)
    if not np.issubdtype(arr.dtype, np.integer):
        arr = arr.astype(config.nt())
    return arr
예제 #3
0
 def nhot(self, names):
   if not isinstance(names, (list, tuple)):
     names = (names,)
   rv = np.zeros(self.size, dtype = config.nt())
   for name in names:
     rv[self.to_index(name)] = 1
   return rv
예제 #4
0
    def extract(self, state, candidates):
        rv = extract(state, self.poke_features)
        candidates = list(candidates)

        while len(candidates) < len(parser.all_actions_singles()):
            candidates.append(None)

        rv['mask'] = np.asarray([float(bool(c))
                                 for c in candidates]).astype(config.nt())

        return rv
예제 #5
0
    def _convert(self, item):
        if self.transform:
            item = self.transform(item)

        fs = self.policy.extract(item['state'], item['candidates'])
        rv = {
            'actions': np.asarray([item['action']], dtype=config.nt()),
            'advantages': np.asarray([item['advantage']], dtype=config.nt()),
            'value_preds': np.asarray([item['value_pred']], dtype=config.nt()),
            'returns': np.asarray([item['return']], dtype=config.nt()),
        }

        for k in ['probs', 'log_probs']:
            if k in item:
                rv[k] = item[k]

        for k, v in fs.items():
            rv['features_' + k] = v

        return rv
예제 #6
0
    def gen7rb_level(base):
      mbst = F.mbst(base, 100.)
      mbstmin = np.min(mbst)
      level = np.floor(100 * mbstmin / mbst)
      mbst = F.mbst(base, level)
      while True:
        mbst = F.mbst(base, level)
        done = np.dtype(bool).type((mbst >= mbstmin) | (level >= 100))
        if np.all(done):
          break
        level += np.dtype(config.nt()).type(~done)

      return level
예제 #7
0
  def act(self, state, candidates):
    move_candidates = []
    switch_candidates = []

    me = nav.get_player_side(state)
    opp = nav.get_opponent_side(state)

    active = nav.get_active_pokes(me)
    active = active and active[0]
    moves = active and [v[0] for v in active['moveTrack']]

    opp_active = nav.get_active_pokes(opp)
    opp_active = opp_active and opp_active[0]

    if not self._type_aware:
      opp_active = None

    for i, action in enumerate(actions.GEN7SINGLES):
      if candidates[i]:
        if action.type == 'move':
          move_key = moves[action.slot - 1]
          power = compute_power(move_key, opp_active)
          move_candidates.append((power, action, i))
        else:  # action.type == 'switch'
          poke = me['pokemon'][action.slot - 1]
          weakness = 0.
          if opp_active:
            for opp_type in opp_active['types']:
              weakness += compute_multiplier(opp_type, poke['types'])
          switch_candidates.append((weakness, action, i))
    
    selected = None
    if move_candidates:
      random.shuffle(move_candidates)
      move_candidates.sort(key = self._move_sort_key, reverse = True)
      selected = move_candidates[0][-1]
    else:
      random.shuffle(switch_candidates)
      switch_candidates.sort(key = self._switch_sort_key)
      selected = switch_candidates[0][-1]
    
    rv = np.zeros(len(candidates), dtype = config.nt())
    rv[selected] = 1.
    return dict(probs = rv)
예제 #8
0
파일: player.py 프로젝트: skishore/metagrok
        def fn():
            state = self.engine.fetch(self.gid, self.request)
            if self.request.get(
                    'teamPreview') and self.candidates == 'teampreview':
                order = [
                    str(i + 1) for i in range(self.request['maxTeamSize'])
                ]
                random.shuffle(order)
                action_string = 'team ' + ','.join(order)
                result = dict(state=state,
                              actionString=action_string,
                              _updates=block_updates)
            else:
                result = self.policy.act(state, self.candidates)
                mask = np.asarray([1. if c else 0. for c in self.candidates])
                if isinstance(result, gevent.event.AsyncResult):
                    result = result.get()
                probs = result['probs']
                probs = (1. - self._epsilon) * probs + (
                    self._epsilon * mask / sum(mask)).astype(config.nt())
                if self._play_best_move:
                    action = np.argmax(probs)
                else:
                    action = np.random.choice(len(self.candidates), p=probs)

                # TODO: why not just use self.candidates?
                if self.request.get('teamPreview'):
                    all_actions = _teampreview_actions
                else:
                    all_actions = _singles_actions

                action_string = all_actions[action]
                result['candidates'] = self.candidates
                result['state'] = state
                result['action'] = action
                result['actionString'] = action_string
                result['_updates'] = block_updates
            self.blocks.append(result)
            rv.set(action_string)
예제 #9
0
 def extract(self, state, candidates):
     mask = np.asarray([float(bool(c))
                        for c in candidates]).astype(config.nt())
     state = np.asarray([state]).astype(config.nt())
     return dict(state=state, mask=mask)
예제 #10
0
def rollup(policy,
           iter_dir,
           gamma,
           lam,
           reward_shaper=None,
           num_workers=0,
           progress_type='bar'):
    # Concatenate rollouts from this iteration, and store in parallel arrays:
    # - Features
    # - Action taken (as an index)
    # - Actual Return
    # - Advantage
    assert progress_type in {'bar', 'log', 'none'}

    if isinstance(iter_dir, six.string_types):
        fnames = list(utils.find(iter_dir, '*.jsons.gz'))
    else:
        assert isinstance(iter_dir, list)
        fnames = iter_dir

    fnames = sorted(fnames)

    logger.info('Rollup has %s files' % len(fnames))
    pool = mulproc.Pool()
    linecount = dict(list(zip(fnames, pool.map(utils.linecount, fnames))))
    pool.close()

    start_rows = {}
    nrows = 0
    for fname in fnames:
        start_rows[fname] = nrows
        nrows += (linecount[fname] - 1)

    logger.info('Rollup has %s rows' % nrows)

    # read the first file, to see what the sizes are
    t = battlelogs.parse(fnames[0],
                         gamma=gamma,
                         lam=lam,
                         reward_shaper=reward_shaper)[-1]
    fs = policy.extract(t['state'], t['candidates'])
    n_actions = 0
    if 'mask' in fs:
        n_actions = fs['mask'].shape[0]

    type_info = {
        'actions': ((nrows, ), 'int64'),
        'advantages': ((nrows, ), config.nt()),
        'returns': ((nrows, ), config.nt()),
        'value_preds': ((nrows, ), config.nt()),
    }

    for k in ['probs', 'log_probs']:
        if k in t:
            na = max(t[k].shape[0], n_actions)
            type_info[k] = ((nrows, na), config.nt())

    for k, v in fs.items():
        type_info['features_' + k] = ((nrows, ) + v.shape, v.dtype)

    if num_workers > 0:
        mk_buf_fn = _mk_RawArray
        queue_mod = mulproc
    else:
        mk_buf_fn = _mk_volatile_buffer
        queue_mod = queue

    underlying = {}
    data = {}
    for k, (shape, dtype) in type_info.items():
        size = six.moves.reduce(lambda x, y: x * y, shape, 1)
        u = underlying[k] = mk_buf_fn(shape, dtype)
        d = np.frombuffer(u, dtype=dtype, count=size)
        d.shape = shape
        data[k] = d

    in_queue = queue_mod.Queue()
    for kv in start_rows.items():
        in_queue.put(kv)
    out_queue = queue_mod.Queue()

    kwargs = dict(
        type_info=type_info,
        underlying=underlying,
        policy_pkl=policy.pkl(),
        gamma=gamma,
        lam=lam,
        reward_shaper=reward_shaper,
        in_queue=in_queue,
        out_queue=out_queue,
    )

    # Read the rest of the files
    if num_workers == 0:
        in_queue.put(None)
        worker = threading.Thread(target=_worker_loop, kwargs=kwargs)
        worker.daemon = True
        worker.start()
        workers = [worker]
    else:
        workers = [
            mulproc.Process(target=_worker_loop, kwargs=kwargs)
            for _ in six.moves.range(num_workers)
        ]
        for worker in workers:
            worker.daemon = True
            in_queue.put(None)
            worker.start()

    pbar = fnames
    total = len(fnames)
    if progress_type == 'bar':
        pbar = tqdm.tqdm(pbar)

    for i, _ in enumerate(pbar):
        fname = out_queue.get()
        if isinstance(fname, Exception):
            raise fname
        if progress_type == 'bar':
            pbar.set_description(fname)
        elif progress_type == 'log':
            current_pct = int(100 * (i + 1) / total)
            prev_pct = int(100 * i / total)
            if current_pct > prev_pct:
                logger.info('Rolled up: [%d/%d (%d%%)] %s', i + 1, total,
                            current_pct, fname)

    for worker in workers:
        worker.join()

    return data
예제 #11
0
 def extract(self, state, candidates):
   pp = (state == +1).flatten().astype(config.nt())
   pm = (state == -1).flatten().astype(config.nt())
   return dict(pp = pp, pm = pm, mask = np.asarray(candidates).astype(config.nt()))
예제 #12
0
def zeros(*args, **kwargs):
    cuda = kwargs.get('cuda')
    return _ctor(config.nt(), cuda)(*args).fill_(0)
예제 #13
0
 def gen7_base_stats():
   df = pd.read_csv(os.path.join(self._dirname, 'base-stats.tsv'), sep = '\t')
   del df['Pokemon']
   return df.astype(config.nt())
예제 #14
0
 def extract(self, state, candidates):
     rv = {}
     rv['mask'] = np.asarray([float(bool(c))
                              for c in candidates]).astype(config.nt())
     return rv
예제 #15
0
    def test_gradient_step_direction(self):
        'Test that good actions are boosted and bad actions are dampened'
        policy = Policy().type(config.tt())

        updater = PPOUpdater(
            policy=policy,
            opt_lr=1e-1,
            num_epochs=1,
            vbatch_size=2,
            clip_param=0.1,
        )

        # Fake a training example
        policy = policy.eval()
        state = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], dtype=int)
        mask = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1])

        results = policy.act(state, mask)

        features = policy.extract(state, mask)
        old_probs = results['probs']
        old_log_probs = results['log_probs']

        features = {
            k: np.repeat(np.expand_dims(v, axis=0), 2, axis=0)
            for k, v in features.items()
        }

        advantages = np.array([-1., +1.], dtype=config.nt())
        log_probs = np.repeat(np.expand_dims(old_log_probs, axis=0), 2, axis=0)
        actions = np.array([4, 2], dtype='int64')

        extras = dict(
            advantages=advantages,
            log_probs=log_probs,
            actions=actions,
            value_preds=np.zeros(2, dtype=config.nt()),
            returns=np.zeros(2, dtype=config.nt()),
        )
        for k, v in features.items():
            extras['features_' + k] = v
        learner.post_prepare(extras)
        extras['advantages'] = np.array([-1., +1.], dtype=config.nt())
        extras = TTensorDictDataset(
            {k: torch.from_numpy(v)
             for k, v in extras.items()})

        updater.update(extras)

        policy = policy.eval()
        results = policy.act(state, mask)
        new_probs = results['probs']
        new_log_probs = results['log_probs']

        # print new_probs - old_probs
        # print
        # print new_log_probs - old_log_probs

        self.assertTrue(np.allclose(np.log(old_probs), old_log_probs))
        self.assertTrue(np.allclose(np.log(new_probs), new_log_probs))
        self.assertGreater(new_probs[2], old_probs[2])
        self.assertLess(new_probs[4], old_probs[4])
예제 #16
0
def convert(replay):
    '''Returns two sequences of Blocks, one for p1 and one for p2.
  Each Block (that is not the last) contains:
    - _updates: The sequence of log messages that that user sees on that turn
    - request: The request object that is associated with this block.
    - state: The current state.
    - action: an integer in [0, N) (now N = 10, soon N = 10 + 12 for mega/zmove/ultra)
      representing the action that was actually taken
    - candidates: a list of N action names (directly sent to PS)
      - the name in the slot will be None if action is not allowed
  The last block contains the winner of the battle, and the logs that lead up until that log.

  The two sequences of blocks may not be of equal length; there are some turns where only
  one player is required to make a decision.
  '''
    raise ValueError('Deprecated')
    blocks = replay['blocks']

    out_blocks = dict(
        p1=dict(name=replay['p1'][0], blocks=[{
            '_updates': []
        }]),
        p2=dict(name=replay['p2'][0], blocks=[{
            '_updates': []
        }]),
    )

    engine.start('p1')
    engine.start('p2')

    for i in range(len(blocks) - 1):
        cur = blocks[i]
        nex = blocks[i + 1]

        _1, _2, p1_action, p2_action = nex['choice'].split('|')

        actions = dict(p1=p1_action, p2=p2_action)

        for p in ['p1', 'p2']:
            request, updates = extract(cur[p])
            #candidates = parser.parse_valid_actions(request, replay_names = True)
            candidates = parser.parse_valid_actions(request)

            for update in updates:
                engine.update(p, update)

            blks = out_blocks[p]['blocks']
            blk = blks[-1]
            blk['_updates'].extend(updates)

            if actions[p]:
                blk['candidates'] = candidates
                blk['action'] = candidates.index(actions[p])
                blk['request'] = request
                blk['state'] = engine.fetch(p, request)
                blk['probs'] = np.zeros(len(candidates), dtype=config.nt())
                blk['probs'][blk['action']] = 1.0

                blks.append({'_updates': []})

    last = blocks[-1]['logs']
    assert last[-1].startswith('|win|')
    _1, _2, winner = last[-1].split('|')

    for p in ['p1', 'p2']:
        blk = out_blocks[p]['blocks'][-1]
        blk['_updates'].extend(
            [l for l in blocks[-1][p] if not l.startswith('|request|')])
        blk['result'] = 'winner' if winner == out_blocks[p]['name'] else 'loser'

    engine.stop('p1')
    engine.stop('p2')

    return out_blocks['p1']['blocks'], out_blocks['p2']['blocks']