def read_real_data(self, sample_shape, batch_feed): action_data_feed = one_hot_embedding(batch_feed['action_id'], self.config.action_num) real_state_rep = self.np2var(batch_feed['state_table'], LONG) real_state_rep = one_hot_embedding(real_state_rep.view(-1), self.config.bucket_num).view( -1, self.state_out_size) return real_state_rep, action_data_feed
def policy_validate_for_human(self, sample_shape, batch_feed): action_data_feed = one_hot_embedding(batch_feed['action_id'], self.config.action_num) batch_size = sample_shape[0] real_state_rep = self.context_encoder(batch_feed).detach() policy_prob = self.policy_validate(real_state_rep, action_data_feed) return policy_prob.detach()
def read_real_data(self, sample_shape, batch_feed): action_data_feed = one_hot_embedding(batch_feed['action_id'], self.config.action_num) if self.config.state_type=='rnn': real_state_rep = self.context_encoder(batch_feed).detach() elif self.config.state_type=='table': real_state_rep = self.np2var(batch_feed['state_table'], FLOAT) return real_state_rep, action_data_feed
def read_real_data_onehot_300(self, sample_shape, batch_feed): action_data_feed = one_hot_embedding(batch_feed['action_id'], 300) action_rep_seg = self.np2var(batch_feed['action_rep_seg'], FLOAT).view(-1, 100) real_state_rep = self.np2var(batch_feed['state_convlab'], FLOAT).view(-1, self.state_out_size) return real_state_rep, action_rep_seg