def _build_model(cls, config): file_path = join(data.workspace.word_vectors, config.model.wvec_path) word_embeddings = SimpleEmbeddings.from_file( file_path, config.model.word_dim, vocab_size=config.model.vocab_size) word_embeddings = word_embeddings.with_special_tokens() token_embedder = TokenEmbedder(word_embeddings) model = None if config.model.type == 0: # regular language model model = LanguageModel(token_embedder, config.model.hidden_dim, config.model.agenda_dim, config.model.num_layers, cls._make_logger()) elif config.model.type == 1: # SVAE model = NoisyLanguageModel( token_embedder, config.model.hidden_dim, config.model.agenda_dim, config.model.num_layers, config.model.kl_weight_steps, config.model.kl_weight_rate, config.model.kl_weight_cap, config.model.dci_keep_rate, cls._make_logger()) assert model is not None model = try_gpu(model) optimizer = optim.Adam(model.parameters(), lr=config.optim.learning_rate) return model, optimizer
def _build_editor(cls, config, num_iter, eps, momentum): """Build Editor. Args: config (Config): Editor config Returns: Editor """ file_path = join(data.workspace.word_vectors, config.wvec_path) word_embeddings = SimpleEmbeddings.from_file(file_path, config.word_dim, vocab_size=config.vocab_size) word_embeddings = word_embeddings.with_special_tokens() source_token_embedder = TokenEmbedder(word_embeddings) target_token_embedder = TokenEmbedder(word_embeddings) if config.decoder_cell == 'SimpleDecoderCell': decoder_cell = SimpleDecoderCell(target_token_embedder, config.hidden_dim, config.word_dim, config.agenda_dim) elif config.decoder_cell == 'AttentionDecoderCell': decoder_cell = AttentionDecoderCell(target_token_embedder, config.agenda_dim, config.hidden_dim, config.hidden_dim, config.attention_dim, config.no_insert_delete_attn, num_layers=config.decoder_layers) else: raise ValueError('{} not implemented'.format(config.decoder_cell)) editor = Editor(source_token_embedder, config.hidden_dim, config.agenda_dim, config.edit_dim, config.lamb_reg, config.norm_eps, config.norm_max, config.kill_edit, decoder_cell, config.encoder_layers, num_iter, eps, momentum) editor = try_gpu(editor) return editor
def _truncate_extension_probs(cls, extension_probs, beam_size): """For each example, keep only the k highest scoring extension probs. Where k = beam_size. Args: extension_probs (np.ndarray): of shape (batch_size, vocab_size) beam_size (int) Returns: extension_probs_sorted (np.ndarray): of shape (batch_size, beam_size). Like extension_probs, but each row is sorted in descending probability, and truncated to a length of beam_size. original_indices (np.ndarray): of shape (batch_size, beam_size). original_indices[i, j] = the original column index of the probability value at extension_probs_sorted[i, j] """ extension_probs_var = try_gpu(Variable((torch.from_numpy(extension_probs)), volatile=True)) extension_probs_sorted_var, original_indices_var = torch.sort(extension_probs_var, 1, descending=True) extension_probs_sorted_var = extension_probs_sorted_var[:, :beam_size] original_indices_var = original_indices_var[:, :beam_size] from_var = lambda v: v.data.cpu().numpy() extension_probs_sorted = from_var(extension_probs_sorted_var) original_indices = from_var(original_indices_var) # batch_size, vocab_size = extension_probs.shape # original_indices = np.argsort(-extension_probs, axis=1) # (batch_size, vocab_size) # original_indices = original_indices[:, :beam_size] # (batch_size, beam_size) # # j_indices, i_indices = np.meshgrid(np.arange(beam_size), np.arange(batch_size)) # (batch_size, beam_size) # # extension_probs_sorted = extension_probs[i_indices, original_indices] # (batch_size, beam_size) return extension_probs_sorted, original_indices
def _build_model(config, training_examples): # build scorer model_config = config.retriever embeds_path = join(data.workspace.word_vectors, 'glove.6B.{}d.txt'.format(model_config.word_dim)) word_embeds = SimpleEmbeddings.from_file(embeds_path, model_config.word_dim, model_config.vocab_size) word_embeds = word_embeds.with_special_tokens() def seq_embedder(trainable): sent_dim = model_config.sent_dim token_embedder = TokenEmbedder(word_embeds, trainable) if trainable: transform = Linear(token_embedder.embed_dim, sent_dim) # if trainable, also add a linear transform else: transform = lambda x: x return BOWSequenceEmbedder(token_embedder, embed_dim=sent_dim, pool=model_config.pool_method, transform=transform) neg_sampler = UniformNegativeSampler(training_examples) input_embedder = seq_embedder(trainable=model_config.train_input) output_embedder = seq_embedder(trainable=model_config.train_output) scorer = Seq2SeqScorer(input_embedder, output_embedder, neg_sampler, score_method=model_config.score_method, loss_method=model_config.loss_method) scorer = try_gpu(scorer) # build optimizer optimizer = optim.Adam(scorer.parameters(), lr=config.optim.learning_rate) return scorer, optimizer
def _create_model(self): config = self.config self.model = create_model(config) self.model = try_gpu(self.model) self.optimizer = optim.Adam(self.model.parameters(), lr=self.config.train.learning_rate, weight_decay=self.config.train.l2_reg) self.gradient_clip = config.train.gradient_clip
def clone(self): config = self._config dqn = try_gpu(Policy.from_config(config.policy, self._num_actions)) dqn._Q.load_state_dict(self._dqn._Q.state_dict()) dqn._target_Q.load_state_dict(self._dqn._target_Q.state_dict()) replay_buffer = ReplayBuffer(config.buffer_max_size) optimizer = optim.Adam(dqn.parameters(), lr=config.learning_rate) return Skill(dqn, replay_buffer, optimizer, self.name + "-clone", config.sync_target_freq, config.min_buffer_size, config.batch_size, config.grad_steps_per_update, config.max_grad_norm, self._num_actions, config)
def _build_model(cls, model_config, optim_config, data_config): """Build Editor. Args: model_config (Config): Editor config optim_config (Config): optimization config data_config (Config): dataset config Returns: Editor """ file_path = join(data.workspace.word_vectors, model_config.wvec_path) word_embeddings = load_embeddings(file_path, model_config.word_dim, model_config.vocab_size, model_config.num_copy_tokens) word_dim = word_embeddings.embed_dim source_token_embedder = TokenEmbedder(word_embeddings, model_config.train_source_embeds) target_token_embedder = TokenEmbedder(word_embeddings, model_config.train_target_embeds) # number of input channels num_inputs = len(data_config.source_cols) decoder_cell = AttentionDecoderCell( target_token_embedder, 2 * word_dim, # 2 * word_dim because we concat base and copy vectors model_config.agenda_dim, model_config.hidden_dim, model_config.hidden_dim, model_config.attention_dim, num_layers=model_config.decoder_layers, num_inputs=num_inputs, dropout_prob=model_config.decoder_dropout_prob, disable_attention=False) encoder = Encoder(word_dim, model_config.agenda_dim, model_config.hidden_dim, model_config.encoder_layers, num_inputs, model_config.encoder_dropout_prob, False) copy_len = [5, 5, 40] model = Editor(source_token_embedder, encoder, decoder_cell, copy_lens=copy_len) model = try_gpu(model) optimizer = optim.Adam(model.parameters(), lr=optim_config.learning_rate) return model, optimizer
def from_config(cls, config, num_actions, name): dqn = try_gpu(Policy.from_config(config.policy, num_actions)) replay_buffer = ReplayBuffer.from_config(config.buffer) imitation_buffer = None if config.imitation: imitation_buffer = ReplayBuffer.from_config(config.buffer) optimizer = optim.Adam(dqn.parameters(), lr=config.learning_rate) return cls(dqn, replay_buffer, imitation_buffer, optimizer, name, config.sync_target_freq, config.min_buffer_size, config.batch_size, config.grad_steps_per_update, config.max_grad_norm, num_actions, config.adaptive_update, config.epsilon_clipping, config.max_worker_reward, config.dqn_vmax, config.dqn_vmin, config)
def crop(state): abstract_state = AS.AbstractState(state) y = int(abstract_state.pixel_y * 84. / 210.) x = int(abstract_state.pixel_x * 84. / 160.) cropped = state.pixel_state[:, y - 10:y + 10, max(0, x - 30):x + 30] padding = (0, 0) if x - 30 < 0: padding = (30 - x, 0) elif x + 30 > 160: padding = (0, 190 - x) cropped = torch.FloatTensor(cropped) cropped = try_gpu( torch.nn.functional.pad(cropped, padding, mode="reflect")) return cropped
def _build_editor(cls, model_config, data_config, word_embeddings, word_dim, vae_mode): source_token_embedder = TokenEmbedder(word_embeddings, model_config.train_source_embeds) target_token_embedder = TokenEmbedder(word_embeddings, model_config.train_target_embeds) # number of input channels if vae_mode: num_inputs = len(data_config.source_cols) else: #edit model uses num_inputs + num_inputs + 1 num_inputs = len(data_config.source_cols) * 2 + 1 decoder_cell = AttentionDecoderCell( target_token_embedder, 2 * word_dim, # 2 * word_dim because we concat base and copy vectors model_config.agenda_dim, model_config.hidden_dim, model_config.hidden_dim, model_config.attention_dim, num_layers=model_config.decoder_layers, num_inputs=num_inputs, dropout_prob=model_config.decoder_dropout_prob, disable_attention=vae_mode) if vae_mode: encoder = Encoder(word_dim, model_config.agenda_dim, model_config.hidden_dim, model_config.encoder_layers, num_inputs, model_config.encoder_dropout_prob, vae_mode, model_config.vae_kappa) else: encoder = Encoder(word_dim, model_config.agenda_dim, model_config.hidden_dim, model_config.encoder_layers, num_inputs, model_config.encoder_dropout_prob, vae_mode) vae_copy_len = [5, 10, 185] editor_copy_len = [5, 10, 10, 5, 10, 10, 150] if vae_mode: model = Editor(source_token_embedder, encoder, decoder_cell, vae_copy_len) else: model = Editor(source_token_embedder, encoder, decoder_cell, editor_copy_len) model = try_gpu(model) return model
def __init__(self, config, save_dir): super(SystematicExplorationTrainingRun, self).__init__(config, save_dir) def make_env(i): def _thunk(): # Only ever try to visualize index 0 if i == 0: return OriginalPixelsWrapper(get_env(config.env)) return get_env(config.env) return _thunk configure_abstract_state(config.env.domain) self._nproc = config.num_processes self._env = SubprocVecEnv([make_env(i) for i in range(self._nproc)]) Traverse.configure(config.edge_expansion_coeff) self._policy = try_gpu( Master.from_config(config.policy, self._env.action_space.n, self._env.reset()[0], self._nproc, config.env.domain)) self._max_episodes = config.max_episodes self._video_freq = config.video_freq self._text_freq = config.text_freq self._stats_freq = config.stats_freq self._eval_freq = config.eval_freq self._eval_video_freq = config.eval_video_freq self._checkpoint_freq = config.checkpoint_freq self._max_checkpoints = config.max_checkpoints self._permanent_checkpoint = config.get("permanent_checkpoint", None) self.workspace.add_dir("visualizations", "visualizations") self.workspace.add_dir("traces", "traces") self.workspace.add_dir("video", "video") self.workspace.add_file("log", "log") logging.basicConfig(filename=self.workspace.log) self._checkpoint_number = 0 self._teleport_frames = 0 self._true_frames = 0 # Number of actually frames run, excludes teleport self._dead_frames = 0 # Number of frames simulated on dead edges self._dead_episodes = 0 # Number of episodes simulated on dead edges self._episode_nums = [0] * self._nproc # Episode number of each proc self._best_reward = 0. # Highest reward episode seen so far for proc 0 self._load_latest_checkpoint()
def __init__(self, config, save_dir): super(DQNTrainingRun, self).__init__(config, save_dir) self._dqn = try_gpu( DQNPolicy.from_config(config.policy, self._env.action_space.n)) optimizer = optim.Adam(self._dqn.parameters(), lr=config.learning_rate) self._train_state = self.checkpoints.load_latest(self._dqn, optimizer) self._replay_buffer = ReplayBuffer.from_config(config.buffer) # See configs/default-base.txt for documentation about these self._max_episode_len = config.max_episode_len self._buffer_size_start = config.buffer_size_start self._batch_size = config.batch_size self._sync_target_freq = config.sync_target_freq self._evaluate_freq = config.evaluate_freq self._episodes_to_evaluate = config.episodes_to_evaluate self._max_frames = config.max_frames self._update_freq = config.update_freq self._max_grad_norm = config.max_grad_norm self.workspace.add_dir("video", "video")
def backward_mom(self, grad_output, verbose=False): input_kp1_grad = [g.data for g in grad_output] p_kp1_grad = [torch.zeros(x.size()).type_as(x) for x in input_kp1_grad] rev_iters = self.iters for k in reversed(range(rev_iters)): lr_k_list = [lr for lr in self.lr] input_k_grad = input_kp1_grad p_kp1_grad = [ p + lr_k * x for p, x, lr_k in zip(p_kp1_grad, input_kp1_grad, lr_k_list) ] input_k_rv = [] input_H_xx_v = [] r = self.eps for i in range(len(p_kp1_grad)): v = p_kp1_grad[i] x_k = self.input_cache[i][k] x_k_rv = Variable((x_k + r * v).type_as(x_k), requires_grad=True) input_k_rv.append(x_k_rv) """ if self.acc_param_grads: all_input_params = input_k_rv + self.params else: all_input_params = input_k_rv """ torch.manual_seed(int(self.seeds[k])) mean, logvar = input_k_rv z_samples = self.encoder._reparameterize(mean, logvar, self.all_z[k]) self.encoder_output = EncoderOutput( self.encoder_output.source_embeds, self.encoder_output.insert_embeds, self.encoder_output.delete_embeds, z_samples) loss = self.decoder.loss(self.encoder_output, self.y) if self.acc_param_grads: all_input_params = input_k_rv + self.params else: all_input_params = input_k_rv #loss = self.loss_fn(input_k_rv, self.y, self.model, self.all_z[k]) #all_grads_rv_k = torch.autograd.grad(loss, all_input_params, retain_graph=True) loss.backward(retain_variables=True) all_grads_rv_k = [i.grad for i in all_input_params] if self.max_grad_norm > 0: self.clip_grad_norm([g.data for g in all_grads_rv_k], self.max_grad_norm) input_grads_rv_k = all_grads_rv_k[:len(input_k_rv)] param_grads_rv_k = all_grads_rv_k[len(input_k_rv):] if self.acc_param_grads: H_wx_v_list = [] for i, p_grad_rv_k in enumerate(param_grads_rv_k): H_wx_v = (p_grad_rv_k.data - try_gpu(self.param_grads[i][k])) / r H_wx_v_list.append(H_wx_v) if self.params[i].grad is None: # try removing GPU below self.params[i].grad = GPUVariable( torch.zeros(self.params[i].size()).type_as( self.params[i].data)) if self.max_grad_norm > 0: self.clip_grad_norm(H_wx_v_list, self.max_grad_norm) for i in range(len(self.params)): self.params[i].grad.data += -H_wx_v_list[i] for i, x_k_rv_grad in enumerate(input_grads_rv_k): H_xx_v = (x_k_rv_grad.data - self.input_grads[i][k]) / r input_H_xx_v.append(H_xx_v) input_kp1_grad = [ x_kp1_grad - H_xx_v for (x_kp1_grad, H_xx_v) in zip(input_kp1_grad, input_H_xx_v) ] if self.max_grad_norm > 0: self.clip_grad_norm(input_kp1_grad, self.max_grad_norm) p_kp1_grad = [p.mul_(self.momentum) for p in p_kp1_grad] if verbose: print('mom', k, input_kp1_grad[0][0].norm()) return input_kp1_grad
def _build_model(cls, model_config, optim_config, data_config): """Build Editor. Args: model_config (Config): Editor config optim_config (Config): optimization config data_config (Config): dataset config Returns: Editor """ file_path = join(data.workspace.word_vectors, model_config.wvec_path) word_embeddings = load_embeddings(file_path, model_config.word_dim, model_config.vocab_size, model_config.num_copy_tokens) word_dim = word_embeddings.embed_dim edit_model = cls._build_editor(model_config, data_config, word_embeddings, word_dim, vae_mode=False) #VAEretreiver vocab_dict = word_embeddings.vocab._word2index encoder = Encoder(word_dim, model_config.agenda_dim, model_config.hidden_dim, model_config.encoder_layers, len(data_config.source_cols), model_config.encoder_dropout_prob, use_vae=True, kappa=model_config.vae_kappa, use_target=False) source_token_embedder = TokenEmbedder(word_embeddings, model_config.train_source_embeds) target_token_embedder = TokenEmbedder(word_embeddings, model_config.train_target_embeds) ret_copy_len = [5, 10, 165] num_inputs = len(data_config.source_cols) decoder_cell = AttentionDecoderCell( target_token_embedder, 2 * word_dim, # 2 * word_dim because we concat base and copy vectors model_config.agenda_dim, model_config.hidden_dim, model_config.hidden_dim, model_config.attention_dim, num_layers=model_config.decoder_layers, num_inputs=num_inputs, dropout_prob=model_config.decoder_dropout_prob, disable_attention=True) vae_model = VAERetriever(source_token_embedder, encoder, decoder_cell, ret_copy_len) ret_model = vae_model vae_ret_model = EditRetriever(vae_model, ret_model, edit_model) vae_ret_model = try_gpu(vae_ret_model) optimizer = optim.Adam(vae_ret_model.parameters(), lr=optim_config.learning_rate) #optimizer = optim.SGD(vae_ret_model.parameters(), lr=optim_config.learning_rate) return vae_ret_model, optimizer
def _create_model(self): config = self.config self.model = create_model(config) self.model = try_gpu(self.model)
def __init__(self, config, save_dir): super(MiniWoBTrainingRun, self).__init__(config, save_dir) self.workspace.add_dir('traces_replay', join('traces', 'replay')) self.workspace.add_file('traces_demo', join('traces', 'demo-parse-log.txt')) # need to make sure that these times coincide assert config.log.trace_evaluate % config.log.evaluate == 0 assert (config.log.explore % config.explore.program == 0 or config.log.explore % config.explore.neural == 0) assert config.log.replay % config.train.replay == 0 assert config.log.trace_replay % config.log.replay == 0 assert config.log.trace_explore % config.log.explore == 0 # construct environment Episode.configure(config.discount_negative_reward) env = Environment.make(config.env.domain, config.env.subdomain) # TODO: Refactor into a get_environment env.configure( num_instances=config.env.num_instances, seeds=range(config.env.num_instances), headless=config.env.headless, base_url=os.environ.get("MINIWOB_BASE_URL"), cache_state=False, # never cache state reward_processor=get_reward_processor(config.env.reward_processor), wait_ms=config.env.wait_ms, block_on_reset=config.env.block_on_reset, refresh_freq=config.env.refresh_freq, ) self._env = env # construct episode generators self._basic_episode_generator = BasicEpisodeGenerator(self._env, config.explore.max_steps_per_episode, config.log.visualize_attention) def state_equality_checker(s1, s2): """Compare two State objects.""" r1 = s1.dom.visualize() if s1 else None r2 = s2.dom.visualize() if s2 else None return r1 == r2 # TODO(kelvin): better equality check # construct episode logger trace_dir = join(self.workspace.root, 'traces') self._episode_logger = EpisodeLogger(trace_dir, self.tb_logger, self.metadata) # construct replay buffer # group episodes by query fields episode_grouper = lambda ep: frozenset(ep[0].state.fields.keys) episode_identifier = lambda ep: id(ep) # each has its own buffer group_buffer_factory = lambda: RewardPrioritizedReplayBuffer( max_size=config.replay_buffer.size, sampling_quantile=1.0, discount_factor=config.gamma) # buffers are combined into a single grouped buffer self._replay_buffer = GroupedReplayBuffer( episode_grouper, episode_identifier, group_buffer_factory, min_group_size=config.replay_buffer.min_size) self._replay_steps = config.train.replay_steps self._gamma = config.gamma # construct replay logger self._replay_logger = ReplayLogger(self.workspace.traces_replay, self.tb_logger, self.metadata) # load demonstrations with open(self.workspace.traces_demo, 'w', 'utf8') as fout: # pylint: disable=no-member # NOTE: this may be an empty list for some tasks self._demonstrations = load_demonstrations( config.env.subdomain, config.demonstrations.base_dir, config.demonstrations.parser, logfile=fout, min_raw_reward=config.demonstrations.min_raw_reward) # keep a random subset of demonstrations with random_seed(0): random.shuffle(self._demonstrations) self._demonstrations = self._demonstrations[:config.demonstrations.max_to_use] num_demonstrations = len(self._demonstrations) self.metadata['stats.num_demonstrations'] = num_demonstrations if num_demonstrations == 0: logging.warn('NO DEMONSTRATIONS AVAILABLE') # build neural policy neural_policy = try_gpu(MiniWoBPolicy.from_config(config.policy)) optimizer = optim.Adam(neural_policy.parameters(), lr=config.train.learning_rate) # TODO: reload replay buffer? self.train_state = self.checkpoints.load_latest( neural_policy, optimizer) # build program policy self._program_policy = self._build_program_policy()