コード例 #1
0
def gumbel_softmax(logits, dim=-1, tau=1, hard=False, eps=1e-10):
    """
    Sample from the Gumbel-Softmax distribution and optionally discretize.
    Args:
      logits: [batch_size, n_class] unnormalized log-probs
      dim: along which dim the softmax is performed
      tau: non-negative scalar temperature
      hard: if True, take argmax, but differentiate w.r.t. soft sample y
      eps: eps
    Returns:
      [batch_size, n_class] sample from the Gumbel-Softmax distribution.
      If hard=True, then the returned sample will be one-hot, otherwise it will
      be a probability distribution that sums to 1 across classes
    Constraints:
    - this implementation only works on batch_size x num_features tensor for now
    based on
    https://github.com/ericjang/gumbel-softmax/blob/3c8584924603869e90ca74ac20a6a03d99a91ef9/Categorical%20VAE.ipynb ,
    (MIT license)
    """
    y_soft = _gumbel_softmax_sample(logits, tau=tau, eps=eps)
    if hard:
        _, k = y_soft.data.max(dim=dim)
        # this bit is based on
        # https://discuss.pytorch.org/t/stop-gradients-for-st-gumbel-softmax/530/5
        y_hard = torch.zeros_like(as_tensor(logits))
        set_index_one_hot_(y_hard, dim, k, 1.0)
        # this cool bit of code achieves two things:
        # - makes the output value exactly one-hot (since we add then
        #   subtract y_soft value)
        # - makes the gradient equal to y_soft gradient (since we strip
        #   all other gradients)
        y = var_with(y_hard - as_tensor(y_soft), y_soft) + y_soft
    else:
        y = y_soft
    return y
コード例 #2
0
def instance_accuracy_nqueens(label,
                              raw_pred,
                              return_float=True,
                              feed_dict=None,
                              args=None):
    """get instance-wise accuracy for structured prediction task instead of pointwise task"""

    pred = as_tensor(raw_pred)
    pred = (pred > 0.5).float()

    label = as_tensor(label).float()
    diff = torch.abs(label - pred)
    point_acc = 1 - torch.sum(diff) / label.numel()
    incorrect_count = torch.sum(diff, dim=1)
    incorrect = len(torch.nonzero(incorrect_count))

    in_acc = 1 - incorrect / len(label)

    errors = []
    reward = []
    corrected_acc = 0
    for i, x in enumerate(pred):
        if match_query(feed_dict["query"][i][:, 0].float(),
                       x) and is_safe_nqueens(x):
            corrected_acc += 1
            acc_vector[i] = 1.0
        else:
            errors.append(feed_dict["count"][i].item())

        diff = torch.sum(torch.abs(feed_dict["target_set"][i].float() - x),
                         dim=1)
        reward.append(diff)

    corrected_acc /= len(pred)

    reward = torch.stack(reward)
    if args is not None and args.rl_reward == 'count':
        reward = -1 * reward.float()
    else:
        reward = -1 * torch.clamp(reward, 1).float()

    if return_float:
        return {
            "accuracy": in_acc,
            "corrected accuracy": corrected_acc,
            "pointwise accuracy": point_acc.item(),
            "classification accuracy": classification_acc.item()
        }, errors, reward
    return {
        "accuracy": torch.tensor(in_acc),
        "corrected accuracy": torch.tensor(corrected_acc),
        "pointwise accuracy": point_acc,
        "classification accuracy": classification_acc
    }, errors, reward
コード例 #3
0
def make_data(traj, gamma):
    """Aggregate data as a batch for RL optimization."""
    q = 0
    discount_rewards = []
    for reward in traj['rewards'][::-1]:
        q = q * gamma + reward
        discount_rewards.append(q)
    discount_rewards.reverse()

    traj['states'] = as_tensor(np.array(traj['states']))
    traj['actions'] = as_tensor(np.array(traj['actions']))
    traj['discount_rewards'] = as_tensor(np.array(discount_rewards)).float()
    return traj
コード例 #4
0
def make_data(traj, gamma):
    Q = 0
    discount_rewards = []
    for reward in traj['rewards'][::-1]:
        Q = Q * gamma + reward
        discount_rewards.append(Q)
    discount_rewards.reverse()

    traj['states'] = as_tensor(np.array(traj['states']))
    if args.is_path_task:
        traj['relations'] = as_tensor(np.array(traj['relations']))
    traj['actions'] = as_tensor(np.array(traj['actions']))
    traj['discount_rewards'] = as_tensor(np.array(discount_rewards)).float()
    return traj
コード例 #5
0
def instance_accuracy_futoshiki(label,
                                raw_pred,
                                return_float=True,
                                feed_dict=None,
                                pred_aux=None):
    """get instance-wise accuracy for structured prediction task instead of pointwise task"""
    pred = as_tensor(raw_pred)
    pred = (pred > 0.5).float()

    label = as_tensor(label).float()
    diff = torch.abs(label - pred)
    point_acc = 1 - torch.sum(diff) / label.numel()
    incorrect_count = torch.sum(diff, dim=1)
    incorrect = len(torch.nonzero(incorrect_count))

    in_acc = 1 - incorrect / len(label)

    errors = []
    corrected_acc = 0
    for i, x in enumerate(pred):
        constraints = feed_dict["query"][i][:, 1:]
        if is_safe_futoshiki(x, constraints):
            corrected_acc += 1
        else:
            errors.append(feed_dict["count"][i].item())
    corrected_acc /= len(pred)

    if pred_aux is not None:
        pred_aux = (pred_aux > 0.5).float()
        classification_acc = 1 - \
            torch.sum(
                torch.abs(pred_aux-feed_dict["is_ambiguous"].float()))/len(pred_aux)
    else:
        classification_acc = torch.zeros(1)

    if return_float:
        return {
            "accuracy": in_acc,
            "corrected accuracy": corrected_acc,
            "pointwise accuracy": point_acc.item(),
            "classification accuracy": classification_acc.item()
        }, errors
    return {
        "accuracy": torch.tensor(in_acc),
        "corrected accuracy": torch.tensor(corrected_acc),
        "pointwise accuracy": point_acc,
        "classification accuracy": classification_acc
    }, errors
コード例 #6
0
    def step(self,
             feed_dict,
             reduce_func=default_reduce_func,
             cast_tensor=False,
             measure_time=False):
        if hasattr(self.model, 'train_step'):
            return self.model.train_step(self.optimizer, feed_dict)

        assert self._model.training, 'Step a evaluation-mode model.'
        extra = dict()

        self.trigger_event('step:before', self)

        if cast_tensor:
            feed_dict = as_tensor(feed_dict)

        if measure_time:
            end_time = cuda_time()

        self.trigger_event('forward:before', self, feed_dict)
        loss, monitors, output_dict = self._model(feed_dict)
        self.trigger_event('forward:after', self, feed_dict, loss, monitors,
                           output_dict)

        if measure_time:
            extra['time/forward'] = cuda_time() - end_time
            end_time = cuda_time(False)

        loss = reduce_func('loss', loss)
        monitors = {k: reduce_func(k, v) for k, v in monitors.items()}

        loss_f = as_float(loss)
        monitors_f = as_float(monitors)

        if measure_time:
            extra['time/loss'] = cuda_time() - end_time
            end_time = cuda_time(False)

        self._optimizer.zero_grad()
        self.trigger_event('backward:before', self, feed_dict, loss, monitors,
                           output_dict)
        if loss.requires_grad:
            loss.backward()

        if measure_time:
            extra['time/backward'] = cuda_time() - end_time
            end_time = cuda_time(False)

        self.trigger_event('backward:after', self, feed_dict, loss, monitors,
                           output_dict)
        if loss.requires_grad:
            self._optimizer.step()

        if measure_time:
            extra['time/optimize'] = cuda_time() - end_time
            end_time = cuda_time(False)

        self.trigger_event('step:after', self)

        return loss_f, monitors_f, output_dict, extra
コード例 #7
0
ファイル: env.py プロジェクト: vacancy/Jacinle
    def step(self, feed_dict, grad_clip=0., reduce_func=default_reduce_func, cast_tensor=False, measure_time=False):
        if hasattr(self.model, 'train_step'):
            try:
                return self.model.train_step(
                    self.optimizer, feed_dict,
                    grad_clip=grad_clip, reduce_func=reduce_func, cast_tensor=False
                )
            except NotImplementedError:
                pass

        extra = dict()

        self.prepare()

        if measure_time:
            end_time = cuda_time()

        if cast_tensor:
            feed_dict = as_tensor(feed_dict)

        self.trigger_event('forward:before', self, feed_dict)
        loss, monitors, output_dict = self._model(feed_dict)
        self.trigger_event('forward:after', self, feed_dict, loss, monitors, output_dict)

        if measure_time:
            extra['time/forward'] = cuda_time() - end_time
            end_time = cuda_time(False)

        return self.update(feed_dict, loss, monitors, output_dict, grad_clip=grad_clip, reduce_func=reduce_func, measure_time=measure_time, extra=extra)
コード例 #8
0
    def evaluate(self, feed_dict, cast_tensor=False):
        assert not self._model.training, 'Evaluating a training-mode model.'
        begin = time.time()
        if cast_tensor:
            feed_dict = as_tensor(feed_dict)
        with torch.no_grad():
            output_dict = self._model(feed_dict)
        end = time.time()

        return output_dict, dict(gpu_time=end - begin)
コード例 #9
0
 def validate_step(self, feed_dict, metric, meters=None):
     feed_dict_np = as_numpy(feed_dict)
     feed_dict = as_tensor(feed_dict)
     with torch.no_grad():
         output_dict = self._model(feed_dict)
     output_dict_np = as_numpy(output_dict)
     result = as_float(metric(feed_dict_np, output_dict_np))
     if meters is not None:
         meters.update(result)
     return result
コード例 #10
0
def make_data(traj, gamma):
    """Aggregate data as a batch for RL optimization."""
    q = 0
    discount_rewards = []
    for reward in traj['rewards'][::-1]:
        q = q * gamma + reward
        discount_rewards.append(q)
    discount_rewards.reverse()

    if type(traj['states'][0]) is list:
        f1 = [f[0] for f in traj['states']]
        f2 = [f[1] for f in traj['states']]
        traj['states'] = [torch.cat(f1, dim=0), torch.cat(f2, dim=0)]
    else:
        traj['states'] = as_tensor(np.array(traj['states']))

    traj['actions'] = as_tensor(np.array(traj['actions']))
    traj['discount_rewards'] = as_tensor(np.array(discount_rewards)).float()
    return traj
コード例 #11
0
    def zero_state(self, input):
        batch_dim = 0 if self.batch_first else 1
        batch_size = input.size(batch_dim)
        hidden_size = self.rnn.hidden_size
        nr_layers = self.rnn.num_layers * (int(self.rnn.bidirectional) + 1)
        state_shape = (nr_layers, batch_size, self.rnn.hidden_size)

        storage = as_tensor(input)
        gen = lambda: torch.zeros(*state_shape, device=input.device)
        if self.state_is_tuple:
            return (gen(), gen())
        return gen()
コード例 #12
0
def _gumbel_softmax_sample(logits, dim=-1, tau=1, eps=1e-10):
    """
    Draw a sample from the Gumbel-Softmax distribution
    based on
    https://github.com/ericjang/gumbel-softmax/blob/3c8584924603869e90ca74ac20a6a03d99a91ef9/Categorical%20VAE.ipynb
    (MIT license)
    """
    gumbel_noise = _sample_gumbel(logits.size(),
                                  eps=eps,
                                  out=as_tensor(logits).new())
    y = logits + var_with(gumbel_noise, logits)
    return F.softmax(y / tau, dim=dim)
コード例 #13
0
    def _get_result_given_player(self, index, meters, number, player, mode):
        assert mode in ['train', 'test', 'mining', 'inherit']
        params = dict(eval_only=True,
                      number=number,
                      play_name='{}_epoch{}_episode{}'.format(
                          mode, self.current_epoch, index))
        backup = None
        if mode == 'train':
            params['eval_only'] = False
            params['dataset'] = self.valid_action_dataset
            params['entropy_beta'] = self.entropy_beta
            meters.update(lr=self.lr, entropy_beta=self.entropy_beta)
        elif mode == 'test':
            params['dump'] = True
            params['use_argmax'] = True
        else:
            backup = copy.deepcopy(player)
            params['use_argmax'] = self.is_candidate

        succ, score, traj, length = run_episode(player, self.model, **params)
        meters.update(number=number, succ=succ, score=score, length=length)

        if mode == 'train':
            feed_dict = make_data(traj, args.gamma)
            feed_dict['entropy_beta'] = as_tensor(self.entropy_beta).float()

            # content from valid_move dataset
            states, actions, labels = \
                self.valid_action_dataset.sample_batch(args.batch_size)
            feed_dict['pred_states'] = as_tensor(states)
            feed_dict['pred_actions'] = as_tensor(actions)
            feed_dict['valid'] = as_tensor(labels).float()
            if args.use_gpu:
                feed_dict = as_cuda(feed_dict)
            return feed_dict
        else:
            message = ('> {} iter={iter}, number={number}, succ={succ}, '
                       'score={score:.4f}, length={length}').format(
                           mode, iter=index, **meters.val)
            return message, dict(succ=succ, number=number, backup=backup)
コード例 #14
0
    def step(self, closure=None):
        loss = None
        if closure is not None:
            loss = closure()

        self._current += 1

        for group in self._base_optimizer.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                #source gradient
                d_p = p.grad.data
                param_state = self._base_optimizer.state[p]

                # MJY:: we ensure that grad_buffer does not require grad.
                if 'grad_buffer' not in param_state:
                    buf = param_state['grad_buffer'] = []
                else:
                    buf = param_state['grad_buffer']
                    #MZ: cannot simply add cause of different batch size
                    #buf.add_(d_p)
                buf.append(d_p.clone())

                #MZ: FIX
                if 'exp_avg' not in param_state:
                    self._base_optimizer.state[p][
                        'exp_avg'] = torch.zeros_like(
                            p, memory_format=torch.preserve_format)
                    self._base_optimizer.state[p][
                        'exp_avg_sq'] = torch.zeros_like(
                            p, memory_format=torch.preserve_format)
                    self._base_optimizer.state[p]['step'] = 0

                if self._current >= self._nr_acc:
                    assert len(self.batch_sizes) == self._current
                    #buf.mul_(1. / self._current)
                    r = (torch.stack(buf, -1) * as_tensor(
                        np.array(self.batch_sizes) / sum(self.batch_sizes)).to(
                            buf[0].device)).sum(-1)
                    p.grad.data.copy_(r)
                    #buf.zero_()
                    buf.clear()

        if self._current >= self._nr_acc:
            self._base_optimizer.step()
            self._current = 0
            self.batch_sizes.clear()

        return loss
コード例 #15
0
    def train_step(self, feed_dict, meters=None):
        assert self._model.training
        feed_dict = as_tensor(feed_dict)

        self._optimizer.zero_grad()
        loss, monitors, output_dict = self._model(feed_dict)
        loss.backward()
        self._optimizer.step()

        loss, monitors = map(as_float, [loss, monitors])
        if meters is not None:
            meters.update(loss=loss)
            meters.update(monitors)

        return as_float(loss)
コード例 #16
0
ファイル: thutils.py プロジェクト: matthieu637/DLM
def binary_accuracy(label, raw_pred, eps=1e-20, return_float=True):
    """get accuracy for binary classification problem."""
    pred = as_tensor(raw_pred).squeeze(-1)
    pred = (pred > 0.5).float()
    label = as_tensor(label).float()
    # The $acc is micro accuracy = the correct ones / total
    acc = label.eq(pred).float()

    # The $balanced_accuracy is macro accuracy, with class-wide balance.
    nr_total = torch.ones(label.size(), dtype=label.dtype,
                          device=label.device).sum(dim=-1)
    nr_pos = label.sum(dim=-1)
    nr_neg = nr_total - nr_pos
    pos_cnt = (acc * label).sum(dim=-1)
    neg_cnt = acc.sum(dim=-1) - pos_cnt
    balanced_acc = ((pos_cnt + eps) / (nr_pos + eps) + (neg_cnt + eps) /
                    (nr_neg + eps)) / 2.0

    # $sat means the saturation rate of the predication,
    # measure how close the predections are to 0 or 1.
    sat = 1 - (raw_pred - pred).abs()
    if return_float:
        acc = as_float(acc.mean())
        balanced_acc = as_float(balanced_acc.mean())
        sat_mean = as_float(sat.mean())
        sat_min = as_float(sat.min())
    else:
        sat_mean = sat.mean(dim=-1)
        sat_min = sat.min(dim=-1)[0]

    return {
        'accuracy': acc,
        'balanced_accuracy': balanced_acc,
        'saturation/mean': sat_mean,
        'saturation/min': sat_min,
    }
コード例 #17
0
    def _get_result_given_player(self, index, meters, number, player, mode):
        assert mode in ['train', 'test', 'mining', 'inherit']
        params = dict(eval_only=True,
                      number=number,
                      play_name='{}_epoch{}_episode{}'.format(
                          mode, self.current_epoch, index))
        backup = None
        if mode == 'train':
            params['eval_only'] = False
            params['entropy_beta'] = self.entropy_beta
            meters.update(lr=self.lr, entropy_beta=self.entropy_beta)
        elif mode == 'test':
            params['dump'] = True
            params['use_argmax'] = True
        else:
            backup = copy.deepcopy(player)
            params['use_argmax'] = self.is_candidate
        succ, score, traj, length, optimal = \
            run_episode(player, self.model, **params)
        meters.update(number=number,
                      succ=succ,
                      score=score,
                      length=length,
                      optimal=optimal)

        if mode == 'train':
            feed_dict = make_data(traj, args.gamma)
            feed_dict['entropy_beta'] = as_tensor(self.entropy_beta).float()

            if args.use_gpu:
                feed_dict = as_cuda(feed_dict)
            return feed_dict
        else:
            message = '> {} iter={iter}, number={number}, succ={succ}, \
score={score:.4f}, length={length}, optimal={optimal}'.format(mode,
                                                              iter=index,
                                                              **meters.val)
            return message, dict(succ=succ, number=number, backup=backup)
コード例 #18
0
 def _inference_model(self, feed_dict):
     feed_dict = as_tensor(feed_dict)
     with torch.no_grad():
         return as_numpy(self._model(feed_dict))
コード例 #19
0
def run_episode(env,
                model,
                number,
                play_name='',
                dump=False,
                eval_only=False,
                use_argmax=False,
                need_restart=False,
                entropy_beta=0.0):
    """Run one episode using the model with $number nodes/numbers."""
    is_over = False
    traj = collections.defaultdict(list)
    score = 0
    moves = []
    # If dump_play=True, store the states and actions in a json file
    # for visualization.
    dump_play = args.dump_play and dump

    if need_restart:
        env.restart()

    if args.is_path_task:
        optimal = env.unwrapped.dist
        relation = env.unwrapped.graph.get_edges()
        relation = np.stack([relation, relation.T], axis=-1)
        st, ed = env.current_state
        nodes_trajectory = [int(st)]
        destination = int(ed)
        policies = []
    elif args.is_sort_task:
        optimal = env.unwrapped.optimal
        array = [str(i) for i in env.unwrapped.array]

    while not is_over:
        if args.is_path_task:
            st, ed = env.current_state
            state = np.zeros((relation.shape[0], 2))
            state[st, 0] = 1
            state[ed, 1] = 1
            feed_dict = dict(states=np.array([state]),
                             relations=np.array([relation]))
        elif args.is_sort_task:
            state = env.current_state
            feed_dict = dict(states=np.array([state]))
        feed_dict['entropy_beta'] = as_tensor(entropy_beta).float()
        feed_dict = as_tensor(feed_dict)
        if args.use_gpu:
            feed_dict = as_cuda(feed_dict)

        with torch.set_grad_enabled(not eval_only):
            output_dict = model(feed_dict)

        policy = output_dict['policy']
        p = as_numpy(policy.data[0])
        action = p.argmax() if use_argmax else random.choice(len(p), p=p)
        reward, is_over = env.action(action)

        # collect moves information
        if dump_play:
            if args.is_path_task:
                moves.append(int(action))
                nodes_trajectory.append(int(env.current_state[0]))
                logits = as_numpy(output_dict['logits'].data[0])
                tops = np.argsort(p)[-10:][::-1]
                tops = list(
                    map(lambda x: (int(x), float(p[x]), float(logits[x])),
                        tops))
                policies.append(tops)
            if args.is_sort_task:
                # Need to ensure that env.utils.MapActionProxy is the outermost class.
                mapped_x, mapped_y = env.mapping[action]
                moves.append([mapped_x, mapped_y])

        # For now, assume reward=1 only when succeed, otherwise reward=0.
        # Manipulate the reward and get success information according to reward.
        if reward == 0 and args.penalty is not None:
            reward = args.penalty
        succ = 1 if is_over and reward > 0.99 else 0

        score += reward
        traj['states'].append(state)
        if args.is_path_task:
            traj['relations'].append(relation)
        traj['rewards'].append(reward)
        traj['actions'].append(action)

    # dump json file storing information of playing
    if dump_play and not (args.dump_fail_only and succ):
        if args.is_path_task:
            num = env.unwrapped.nr_nodes
            graph = relation[:, :, 0].tolist()
            coordinates = env.unwrapped.graph.get_coordinates().tolist()
            json_str = json.dumps(
                dict(graph=graph,
                     coordinates=coordinates,
                     policies=policies,
                     destination=destination,
                     current=nodes_trajectory,
                     moves=moves))
        if args.is_sort_task:
            num = env.unwrapped.nr_numbers
            json_str = json.dumps(dict(array=array, moves=moves))
        dump_file = os.path.join(args.current_dump_dir,
                                 '{}_size{}.json'.format(play_name, num))
        with open(dump_file, 'w') as f:
            f.write(json_str)

    length = len(traj['rewards'])
    return succ, score, traj, length, optimal
コード例 #20
0
ファイル: thutils.py プロジェクト: matthieu637/DLM
def rms(p):
    """Root mean square function."""
    return as_float((as_tensor(p)**2).mean()**0.5)
コード例 #21
0
def run_episode(env,
                model,
                number,
                play_name='',
                dump=False,
                dataset=None,
                eval_only=False,
                use_argmax=False,
                need_restart=False,
                entropy_beta=0.0):
    """Run one episode using the model with $number blocks."""
    is_over = False
    traj = collections.defaultdict(list)
    score = 0
    if need_restart:
        env.restart()
    nr_objects = number + 1
    # If dump_play=True, store the states and actions in a json file
    # for visualization.
    dump_play = args.dump_play and dump
    if dump_play:
        array = env.unwrapped.current_state
        moves, new_pos, policies = [], [], []

    while not is_over:
        state = env.current_state
        feed_dict = dict(states=np.array([state]))
        feed_dict['entropy_beta'] = as_tensor(entropy_beta).float()
        feed_dict = as_tensor(feed_dict)
        if args.use_gpu:
            feed_dict = as_cuda(feed_dict)

        with torch.set_grad_enabled(not eval_only):
            output_dict = model(feed_dict)
        policy = output_dict['policy']
        p = as_numpy(policy.data[0])
        action = p.argmax() if use_argmax else random.choice(len(p), p=p)
        # Need to ensure that the env.utils.MapActionProxy is the outermost class.
        mapped_x, mapped_y = env.mapping[action]
        # env.unwrapped to get the innermost Env class.
        valid = env.unwrapped.world.moveable(mapped_x, mapped_y)
        reward, is_over = env.action(action)
        if dump_play:
            moves.append([mapped_x, mapped_y])
            res = tuple(env.current_state[mapped_x][2:])
            new_pos.append((int(res[0]), int(res[1])))

            logits = as_numpy(output_dict['logits'].data[0])
            tops = np.argsort(p)[-10:][::-1]
            tops = list(
                map(lambda x: (env.mapping[x], float(p[x]), float(logits[x])),
                    tops))
            policies.append(tops)

        # For now, assume reward=1 only when succeed, otherwise reward=0.
        # Manipulate the reward and get success information according to reward.
        if reward == 0 and args.penalty is not None:
            reward = args.penalty
        succ = 1 if is_over and reward > 0.99 else 0

        score += reward
        traj['states'].append(state)
        traj['rewards'].append(reward)
        traj['actions'].append(action)
        if not eval_only and dataset is not None and mapped_x != mapped_y:
            dataset.append(nr_objects, state, action, valid)

    # Dump json file as record of the playing.
    if dump_play and not (args.dump_fail_only and succ):
        array = array[:, 2:].astype('int32').tolist()
        array = [array[:nr_objects], array[nr_objects:]]
        json_str = json.dumps(
            # Let indent=True for an indented view of json files.
            dict(array=array, moves=moves, new_pos=new_pos, policies=policies))
        dump_file = os.path.join(
            args.current_dump_dir,
            '{}_blocks{}.json'.format(play_name, env.unwrapped.nr_blocks))
        with open(dump_file, 'w') as f:
            f.write(json_str)

    length = len(traj['rewards'])
    return succ, score, traj, length
コード例 #22
0
 def __getitem__(self, index):
     if self._value is None:
         self._value = random.rand()
     time.sleep(0.1)
     return as_tensor(np.array([self._value]))
コード例 #23
0
 def forward(self, *args, cast_tensor=False, **kwargs):
     if cast_tensor:
         args = as_tensor(args)
         kwargs = as_tensor(kwargs)
     outputs = self._model(*args, **kwargs)
     return outputs
コード例 #24
0
ファイル: train.py プロジェクト: dair-iitd/1oML
    def step(self,
             feed_dict,
             reduce_func=default_reduce_func,
             cast_tensor=False):
        assert self._model.training, 'Step a evaluation-mode model.'
        self.num_iters += 1
        self.trigger_event('step:before', self)
        loss_latent = 0.0
        if cast_tensor:
            feed_dict = as_tensor(feed_dict)

        begin = time.time()

        self.trigger_event('forward:before', self, feed_dict)

        rl_loss = 0.0
        if self.mode == 'warmup':
            loss, monitors, output_dict = self._model(feed_dict)
        else:
            if args.no_static:
                loss, monitors, output_dict = self._model(
                    feed_dict, return_loss_matrix=True)
                y_hat = output_dict['pred'].detach()
            else:
                with torch.no_grad():
                    #y_hat = self._static_model(feed_dict)['pred'].detach()
                    static_model_output = self._static_model(
                        feed_dict, return_loss_matrix=True)
                    if isinstance(static_model_output, dict):
                        y_hat = static_model_output['pred'].detach()
                        output_dict = static_model_output
                    else:
                        y_hat = static_model_output[2]['pred'].detach()
                        output_dict = static_model_output[2]

            keys = [
                'mask', 'n', 'query', 'count', 'is_ambiguous', 'qid',
                'target_set', 'relations', 'gtlt'
            ]

            expanded_feed_dict = {}
            for key in keys:
                if key in feed_dict:
                    expanded_feed_dict[key] = expand_tensor(
                        feed_dict[key], feed_dict["count"])
            #
            #unravel target set to obtain different targets
            expanded_feed_dict["target"] = unravel_tensor(
                feed_dict["target_set"], feed_dict["count"])
            # copy interemediate y for each target
            y_hat = expand_tensor(y_hat, feed_dict["count"])

            # inserting detached loss in the expanded_feed_dict for deterministic latent model
            #Pdb().set_trace()
            if 'loss_matrix' in output_dict:
                expanded_feed_dict['loss'] = unravel_tensor(
                    output_dict['loss_matrix'], feed_dict['count']).detach()
                if args.latent_model == 'eg':
                    expanded_feed_dict[
                        'minloss_eg_prob'] = unravel_minloss_epsilon_greedy(
                            output_dict['loss_matrix'], feed_dict['count'],
                            args.minloss_eg_eps).detach()
            # compute latent variable, i.e. the scores for each of the possible targets
            z_latent = self._latent_model(expanded_feed_dict, y_hat,
                                          output_dict)['latent_z']

            # start index and end index are markers for start and end indices
            # of each query in the expanded feed dict
            start_index = torch.cumsum(feed_dict["count"],
                                       0) - feed_dict["count"]
            end_index = torch.cumsum(feed_dict["count"], 0)

            min_indices = []
            action_prob = []
            #rl_weights = []
            weights = []

            # loop over each query
            for s, e in zip(start_index, end_index):
                dis2 = z_latent[s:e].squeeze(1)
                probs = get_prob_from_dis(dis2)
                weights.append(
                    F.pad(probs,
                          (0, feed_dict['target_set'].size(1) - probs.size(0)),
                          "constant", 0))
            #
            selected_feed_dict = feed_dict
            if args.rl_exploration:
                selected_feed_dict["weights"] = rl_sampling(
                    torch.stack(weights).detach().clone())
            else:
                selected_feed_dict["weights"] = torch.stack(
                    weights).detach().clone()

            loss = 0
            if not args.no_static:
                # Pdb().set_trace()
                loss, monitors, output_dict = self._model(selected_feed_dict)
            else:
                loss = (output_dict['loss_matrix'] *
                        selected_feed_dict['weights']
                        ).sum() / selected_feed_dict['weights'].sum()

            if (feed_dict['is_ambiguous'].sum() > 0):
                if not args.rl_exploration:
                    avg_reward = (
                        (output_dict['reward'] *
                         (feed_dict['mask'].float())).sum(dim=1) /
                        (feed_dict['mask'].sum(dim=1).float())).unsqueeze(-1)
                    #avg_reward = (output_dict['reward']*(feed_dict['mask'].float())).sum()/(feed_dict['mask'].sum().float())
                    rewards = (output_dict['reward'] -
                               avg_reward) * (feed_dict['mask'].float())
                    rl_loss = -1.0 * (rewards * torch.stack(weights)).sum(
                    ) / feed_dict['is_ambiguous'].sum()
                else:
                    #use selected_feed_dict['weights']. rewards should be only for non zero samples.
                    #Also, now we use REINFORCE : maximize : reward*log(p_action)
                    rl_loss = -1.0 * (
                        (output_dict['reward'] + 0.5) *
                        selected_feed_dict['weights'] * torch.log(
                            torch.stack(weights) + 1.0 -
                            selected_feed_dict['weights'])
                    ).sum() / feed_dict['is_ambiguous'].sum().float()
            loss_latent = rl_loss

        self.trigger_event('forward:after', self, feed_dict, loss, monitors,
                           output_dict)

        loss = reduce_func('loss', loss)
        loss_f = as_float(loss)

        monitors = {k: reduce_func(k, v) for k, v in monitors.items()}
        if self.mode == 'hot':
            monitors['loss_latent'] = loss_latent
        monitors_f = as_float(monitors)

        self._optimizer.zero_grad()
        if self.mode in ['hot']:
            if torch.is_tensor(loss_latent):
                loss_latent = reduce_func('loss_latent', loss_latent)
            #
            self._latent_optimizer.zero_grad()

        self.trigger_event('backward:before', self, feed_dict, loss, monitors,
                           output_dict)

        if loss.requires_grad:
            loss.backward()

        if self.mode in ['hot']:
            if torch.is_tensor(loss_latent):
                loss_latent.backward()
                # print("Grad:",self._latent_model.digit_embed.weight.grad[2,:2],self._latent_model.atn_across_steps.grad)
                # Pdb().set_trace()
                #print('Latent: ',self.digit_embed.weight.data[2,:4], self.row_embed.weight.data[2,:4])
                #print('Atn over steps: ',self.atn_across_steps)

        self.trigger_event('backward:after', self, feed_dict, loss, monitors,
                           output_dict)

        loss_latent_f = loss_latent.item() if torch.is_tensor(
            loss_latent) else loss_latent
        grad_norm_before_clip, grad_norm_after_clip, param_norm_before_clip, lgrad_norm_before_clip, lgrad_norm_after_clip, lparam_norm_before_clip = 0, 0, 0, -1, -1, 0

        if loss.requires_grad:
            grad_norm_before_clip, grad_norm_after_clip, param_norm_before_clip = utils.gradient_normalization(
                self._model, grad_norm=args.grad_clip)
            #glogger.info(','.join(map(lambda x: str(round(x,6)),[self.current_epoch, self.num_iters, loss_f, loss_latent_f, grad_norm_before_clip.item(), grad_norm_after_clip.item(), param_norm_before_clip.item()])))
            if grad_norm_before_clip <= args.upper_limit_on_grad_norm:
                self._optimizer.step()
            else:
                self.num_bad_updates += 1
                logger.info(
                    'not taking optim step. Grad too high {}. Num bad updates: {}'
                    .format(round(grad_norm_before_clip, 2),
                            self.num_bad_updates))

            #self._optimizer.step()

        if self.mode in ['hot']:
            lgrad_norm_before_clip, lgrad_norm_after_clip, lparam_norm_before_clip = utils.gradient_normalization(
                self._latent_model, grad_norm=args.grad_clip)
            self._latent_optimizer.step()

        glogger.info(','.join(
            map(lambda x: str(round(x, 6)), [
                self.current_epoch, self.num_iters, loss_f, loss_latent_f,
                grad_norm_before_clip, grad_norm_after_clip,
                param_norm_before_clip, lgrad_norm_before_clip,
                lgrad_norm_after_clip, lparam_norm_before_clip
            ])))
        end = time.time()

        self.trigger_event('step:after', self)

        return loss_f, monitors_f, output_dict, {'time/gpu': end - begin}
コード例 #25
0
def _instance_accuracy(label,
                       raw_pred,
                       compare_func,
                       return_float=True,
                       feed_dict=None,
                       args=None):
    """get instance-wise accuracy for structured prediction task instead of pointwise task"""
    # disctretize output predictions
    if not args.task_is_sudoku:
        pred = as_tensor(raw_pred)
        pred = (pred > 0.5).float()
    else:
        step_pred = as_tensor(raw_pred.argmax(dim=1)).float()
        pred = step_pred[:, :, -1]

        # step pred is batch_size x 81 x num_steps
        # transpose for more efficient reward calculation
        # new shape is batch_size x num_Steps x 81
        step_pred = step_pred.transpose(1, 2)

    label = as_tensor(label).type(pred.dtype)

    diff = (label == pred)
    point_acc = torch.sum(diff).float() / label.numel()
    incorrect = torch.min(diff, dim=1)[0]
    in_acc = torch.sum(incorrect).float() / len(label)

    errors = []
    corrected_acc = 0
    reward = []
    new_targets = []
    acc_vector = []
    for i, x in enumerate(pred):
        if compare_func(x, feed_dict['query'][i].type(x.dtype)):
            corrected_acc += 1
            acc_vector.append(1)
            # check if pred matches any target
            if ((feed_dict['target_set'][i].type(x.dtype) == x).sum(dim=1)
                    == x.shape[0]).sum() > 0:
                new_targets.append((None, None))
            else:
                new_targets.append((x, 0))
        else:
            acc_vector.append(0)
            errors.append(feed_dict["count"][i].item())
            new_targets.append((None, None))
        if args.task_is_sudoku:
            #if args.use_gpu:
            #    diff = torch.zeros(len(feed_dict['target_set'][i]),step_pred.shape[1], device=torch.device("cuda"))
            #else:
            #    diff = torch.zeros(len(feed_dict['target_set'][i]),step_pred.shape[1]).cuda()
            #for target_idx,target in enumerate(feed_dict['target_set'][i,:feed_dict['count'][i]].float()):
            #    diff[target_idx] = torch.sum(~(step_pred[i]==target), dim=1).float()
            #for target_idx in range(feed_dict['count'][i],diff.shape[0]):
            #    diff[target_idx] = diff[target_idx-1]
            #
            #alternative tensor way
            NS, NN, TS = step_pred.size(1), step_pred.size(
                2), feed_dict['target_set'].size(1)
            diff = (step_pred[i].unsqueeze(-1).expand(NS, NN, TS).transpose(
                0, 2).float() !=
                    feed_dict['target_set'][i].unsqueeze(-1).expand(
                        TS, NN, NS).float()).sum(dim=1).float()

            if args.rl_reward == 'count':
                reward.append(diff.mean(dim=1))
            else:
                reward.append(torch.clamp_max(diff, 1).mean(dim=1))
        else:
            diff = torch.sum(~(feed_dict["target_set"][i].type(x.dtype) == x),
                             dim=1).float()
            if args.rl_reward == 'count':
                reward.append(diff)
            else:
                reward.append(torch.clamp_max(diff, 1))
    corrected_acc /= len(pred)

    reward = -torch.stack(reward)
    target_set_accuracy = (reward.max(dim=1)[0] >= 0).float().mean()

    if return_float:
        return {
            "accuracy": in_acc.item(),
            "corrected accuracy": corrected_acc,
            "pointwise accuracy": point_acc.item(),
            "target set accuracy": target_set_accuracy.item()
        }, errors, reward  # , acc_vector
    return {
        "accuracy": torch.tensor(in_acc),
        "corrected accuracy": torch.tensor(corrected_acc),
        "pointwise accuracy": point_acc,
        "target set accuracy": target_set_accuracy
    }, errors, reward, new_targets
コード例 #26
0
    def _get_result_given_player(self, index, meters, number, player, mode):
        assert mode in ['train', 'test', 'mining', 'mining-deter', 'mining-stoch', 'inherit', 'test-inter', 'test-inter-deter', 'test-deter']
        params = dict(
            eval_only=True,
            number=number,
            play_name='{}_epoch{}_episode{}'.format(mode, self.current_epoch, index))
        backup = None
        if mode == 'train':
            params['eval_only'] = False
            params['dataset'] = self.valid_action_dataset
            params['entropy_beta'] = self.entropy_beta
            meters.update(lr=self.lr, entropy_beta=self.entropy_beta)
        elif 'test' in mode:
            params['dump'] = True
            params['use_argmax'] = 'deter' in mode
        else:
            backup = copy.deepcopy(player)
            params['use_argmax'] = index < (args.mining_epoch_size//2)

        if mode == 'train':
            if args.use_gpu:
                self.model.cpu()

            mergedfc = []
            for i in range(args.ntrajectory):
                succ, score, traj, length, optimal = run_episode(player, self.model, mode, need_restart=(i!=0), **params)
                if args.task in ['sort', 'path']:
                    meters.update(number=number, succ=succ, score=score, length=length, optimal=optimal)
                else:
                    meters.update(number=number, succ=succ, score=score, length=length)
                feed_dict = make_data(traj, args.gamma)
                # content from valid_move dataset
                if args.pred_weight != 0.0:
                    states, actions, labels = self.valid_action_dataset.sample_batch(args.batch_size)
                    feed_dict['pred_states'] = as_tensor(states)
                    feed_dict['pred_actions'] = as_tensor(actions)
                    feed_dict['valid'] = as_tensor(labels).float()
                mergedfc.append(feed_dict)

            for k in feed_dict.keys():
                if k not in ["rewards", "entropy_beta"]:  # reward not used to update loss
                    if type(mergedfc[0][k]) is list:
                        f1 = [j[k][0] for j in mergedfc]
                        f2 = [j[k][1] for j in mergedfc]
                        feed_dict[k] = [torch.cat(f1, dim=0), torch.cat(f2, dim=0)]
                    else:
                        feed_dict[k] = torch.cat([j[k] for j in mergedfc], dim=0)
            feed_dict['entropy_beta'] = as_tensor(self.entropy_beta).float()
            feed_dict['training'] = as_tensor(True)

            if args.norm_rewards:
                if args.accum_grad > 1:
                    feed_dict['discount_rewards'] = self.model.rnorm.obs_filter(feed_dict['discount_rewards'])
                elif feed_dict['discount_rewards'].shape[0] > 1:
                    feed_dict['discount_rewards'] = (feed_dict['discount_rewards'] - feed_dict['discount_rewards'].mean()) / (feed_dict['discount_rewards'].std() + 10 ** -7)

            #dirty trick
            if args.accum_grad > 1:
                self.optimizer.provide_batch_size(feed_dict['discount_rewards'].shape[0])

            if args.use_gpu:
                feed_dict = as_cuda(feed_dict)
                self.model.cuda()
            self.model.train()
            return feed_dict
        else:
            if args.use_gpu:
                self.model.cpu()
            succ, score, traj, length, optimal = run_episode(player, self.model, mode, **params)
            if args.task in ['sort', 'path']:
                meters.update(number=number, succ=succ, score=score, length=length, optimal=optimal)
                message = ('> {} iter={iter}, number={number}, succ={succ}, '
                       'score={score:.4f}, length={length}, optimal={optimal}').format(mode, iter=index, **meters.val)
            else:
                meters.update(number=number, succ=succ, score=score, length=length)
                message = ('> {} iter={iter}, number={number}, succ={succ}, '
                       'score={score:.4f}, length={length}').format(mode, iter=index, **meters.val)
            return message, dict(succ=succ, number=number, backup=backup)
コード例 #27
0
def run_episode(env,
                model,
                mode,
                number,
                play_name='',
                dump=False,
                dataset=None,
                eval_only=False,
                use_argmax=False,
                need_restart=False,
                entropy_beta=0.0):
    """Run one episode using the model with $number blocks."""
    is_over = False
    traj = collections.defaultdict(list)
    score = 0
    if need_restart:
        env.restart()

    optimal = None
    if args.task == 'path':
        optimal = env.unwrapped.dist
        relation = env.unwrapped.graph.get_edges()
        relation = np.stack([relation, relation.T], axis=-1).astype(dtype=np.float32)
        st, ed = env.current_state
        nodes_trajectory = [int(st)]
        destination = int(ed)
        policies = []
    elif args.task == 'sort':
        optimal = env.unwrapped.optimal
        array = [str(i) for i in env.unwrapped.array]

    # If dump_play=True, store the states and actions in a json file
    # for visualization.
    dump_play = args.dump_play and dump
    if dump_play:
        nr_objects = number + 1
        array = env.unwrapped.current_state
        moves, new_pos, policies = [], [], []

    if args.model == 'dlm':
        # by default network isn't in training mode during data collection
        # but with dlm we don't want to use argmax only
        # except in 2 cases (testing the interpretability or the last mining phase to get an interpretable policy):
        if ('inter' in mode) or (('mining' in mode) or ('inherit' in mode) and number == args.curriculum_graduate):
            model.lowernoise()
        else:
            model.train(True)

            if args.dlm_noise == 1 and (('mining' in mode) or ('inherit' in mode) or ('test' in mode)):
                model.lowernoise()
            elif args.dlm_noise == 2:
                model.lowernoise()

    step = 0
    while not is_over:
        if args.task == 'path':
            st, ed = env.current_state
            state = np.zeros((relation.shape[0], 2), dtype=np.float32)
            state[st, 0] = 1
            state[ed, 1] = 1
            feed_dict = dict(states=[np.array([state]), np.array([relation])])
        else:
            state = env.current_state
            if 'nlrl' not in args.task or args.task == 'sort':
                feed_dict = dict(states=np.array([state]))
            else:
                feed_dict = dict(states=state)
        feed_dict['entropy_beta'] = as_tensor(entropy_beta).float()
        feed_dict['training'] = as_tensor(False)
        feed_dict = as_tensor(feed_dict)

        with torch.set_grad_enabled(False):
            output_dict = model(feed_dict)
        policy = output_dict['policy']
        p = as_numpy(policy.data[0])
        action = p.argmax() if use_argmax else random.choice(len(p), p=p)
        if args.pred_weight != 0.0:
            # Need to ensure that the env.utils.MapActionProxy is the outermost class.
            mapped_x, mapped_y = env.mapping[action]
            # env.unwrapped to get the innermost Env class.
            valid = env.unwrapped.world.moveable(mapped_x, mapped_y)
        reward, is_over = env.action(action)
        step += 1
        if dump_play:
            moves.append([mapped_x, mapped_y])
            res = tuple(env.current_state[mapped_x][2:])
            new_pos.append((int(res[0]), int(res[1])))

            logits = as_numpy(output_dict['logits'].data[0])
            tops = np.argsort(p)[-10:][::-1]
            tops = list(
                map(lambda x: (env.mapping[x], float(p[x]), float(logits[x])), tops))
            policies.append(tops)
        # For now, assume reward=1 only when succeed, otherwise reward=0.
        # Manipulate the reward and get success information according to reward.
        if reward == 0 and args.penalty is not None:
            reward = args.penalty
        succ = 1 if is_over and reward > 0.99 else 0

        score += reward

        if type(feed_dict['states']) is list:
            traj['states'].append([f for f in feed_dict['states']])
        else:
            traj['states'].append(state)

        traj['rewards'].append(reward)
        traj['actions'].append(action)

        if args.pred_weight != 0.0:
            if not eval_only and dataset is not None and mapped_x != mapped_y:
                dataset.append(nr_objects, state, action, valid)

    # Dump json file as record of the playing.
    if dump_play and not (args.dump_fail_only and succ):
        array = array[:, 2:].astype('int32').tolist()
        array = [array[:nr_objects], array[nr_objects:]]
        json_str = json.dumps(
            # Let indent=True for an indented view of json files.
            dict(array=array, moves=moves, new_pos=new_pos,
                 policies=policies))
        dump_file = os.path.join(
            args.current_dump_dir,
            '{}_blocks{}.json'.format(play_name, env.unwrapped.nr_blocks))
        with open(dump_file, 'w') as f:
            f.write(json_str)

    length = step

    if args.model == 'dlm':
        model.restorenoise()

    return succ, score, traj, length, optimal