def initialize(hparams, gen_data, *args, **kwargs): gen_data.set_batch_size(hparams['batch_size']) # Create main model encoder = Encoder(vocab_size=gen_data.n_characters, d_model=hparams['d_model'], padding_idx=gen_data.char2idx[gen_data.pad_symbol], dropout=hparams['dropout'], return_tuple=True) # Create RNN layers rnn_layers = [] has_stack = True for i in range(1, hparams['num_layers'] + 1): rnn_layers.append( StackRNN(layer_index=i, input_size=hparams['d_model'], hidden_size=hparams['d_model'], has_stack=has_stack, unit_type=hparams['unit_type'], stack_width=hparams['stack_width'], stack_depth=hparams['stack_depth'], k_mask_func=encoder.k_padding_mask)) if hparams['num_layers'] > 1: rnn_layers.append(StackedRNNDropout(hparams['dropout'])) rnn_layers.append(StackedRNNLayerNorm(hparams['d_model'])) model = nn.Sequential( encoder, *rnn_layers, RNNLinearOut( out_dim=gen_data.n_characters, hidden_size=hparams['d_model'], bidirectional=False, # encoder=encoder, # dropout=hparams['dropout'], bias=True)) if use_cuda: model = model.cuda() optimizer = parse_optimizer(hparams, model) rnn_args = { 'num_layers': hparams['num_layers'], 'hidden_size': hparams['d_model'], 'num_dir': 1, 'device': device, 'has_stack': has_stack, 'has_cell': hparams['unit_type'] == 'lstm', 'stack_width': hparams['stack_width'], 'stack_depth': hparams['stack_depth'] } return model, optimizer, gen_data, rnn_args
def initialize(hparams, demo_data_gen, unbiased_data_gen, prior_data_gen, *args, **kwargs): # Embeddings provider encoder = Encoder( vocab_size=demo_data_gen.n_characters, d_model=hparams['d_model'], padding_idx=demo_data_gen.char2idx[demo_data_gen.pad_symbol], dropout=hparams['dropout'], return_tuple=True) # Agent entities rnn_layers = [] has_stack = True for i in range(1, hparams['agent_params']['num_layers'] + 1): rnn_layers.append( StackRNN(layer_index=i, input_size=hparams['d_model'], hidden_size=hparams['d_model'], has_stack=has_stack, unit_type=hparams['agent_params']['unit_type'], stack_width=hparams['agent_params']['stack_width'], stack_depth=hparams['agent_params']['stack_depth'], k_mask_func=encoder.k_padding_mask)) if hparams['agent_params']['num_layers'] > 1: rnn_layers.append(StackedRNNDropout(hparams['dropout'])) rnn_layers.append(StackedRNNLayerNorm(hparams['d_model'])) agent_net = nn.Sequential( encoder, *rnn_layers, RNNLinearOut(out_dim=demo_data_gen.n_characters, hidden_size=hparams['d_model'], bidirectional=False, bias=True)) agent_net = agent_net.to(device) optimizer_agent_net = parse_optimizer(hparams['agent_params'], agent_net) selector = MolEnvProbabilityActionSelector( actions=demo_data_gen.all_characters) probs_reg = StateActionProbRegistry() init_state_args = { 'num_layers': hparams['agent_params']['num_layers'], 'hidden_size': hparams['d_model'], 'stack_depth': hparams['agent_params']['stack_depth'], 'stack_width': hparams['agent_params']['stack_width'], 'unit_type': hparams['agent_params']['unit_type'] } agent = PolicyAgent(model=agent_net, action_selector=selector, states_preprocessor=seq2tensor, initial_state=agent_net_hidden_states_func, initial_state_args=init_state_args, apply_softmax=True, probs_registry=probs_reg, device=device) drl_alg = REINFORCE(model=agent_net, optimizer=optimizer_agent_net, initial_states_func=agent_net_hidden_states_func, initial_states_args=init_state_args, prior_data_gen=prior_data_gen, device=device, xent_lambda=hparams['xent_lambda'], gamma=hparams['gamma'], grad_clipping=hparams['reinforce_max_norm'], lr_decay_gamma=hparams['lr_decay_gamma'], lr_decay_step=hparams['lr_decay_step_size'], delayed_reward=not hparams['use_monte_carlo_sim']) # Reward function entities reward_net = nn.Sequential( encoder, RewardNetRNN( input_size=hparams['d_model'], hidden_size=hparams['reward_params']['d_model'], num_layers=hparams['reward_params']['num_layers'], bidirectional=hparams['reward_params']['bidirectional'], use_attention=hparams['reward_params']['use_attention'], dropout=hparams['dropout'], unit_type=hparams['reward_params']['unit_type'], use_smiles_validity_flag=hparams['reward_params'] ['use_validity_flag'])) reward_net = reward_net.to(device) expert_model = XGBPredictor(hparams['expert_model_dir']) true_reward_func = get_jak2_max_reward if hparams[ 'bias_mode'] == 'max' else get_jak2_min_reward reward_function = RewardFunction( reward_net, mc_policy=agent, actions=demo_data_gen.all_characters, device=device, use_mc=hparams['use_monte_carlo_sim'], mc_max_sims=hparams['monte_carlo_N'], expert_func=expert_model, no_mc_fill_val=hparams['no_mc_fill_val'], true_reward_func=true_reward_func, use_true_reward=hparams['use_true_reward']) optimizer_reward_net = parse_optimizer(hparams['reward_params'], reward_net) demo_data_gen.set_batch_size( hparams['reward_params']['demo_batch_size']) irl_alg = GuidedRewardLearningIRL( reward_net, optimizer_reward_net, demo_data_gen, k=hparams['reward_params']['irl_alg_num_iter'], agent_net=agent_net, agent_net_init_func=agent_net_hidden_states_func, agent_net_init_func_args=init_state_args, device=device) init_args = { 'agent': agent, 'probs_reg': probs_reg, 'drl_alg': drl_alg, 'irl_alg': irl_alg, 'reward_func': reward_function, 'gamma': hparams['gamma'], 'episodes_to_train': hparams['episodes_to_train'], 'expert_model': expert_model, 'demo_data_gen': demo_data_gen, 'unbiased_data_gen': unbiased_data_gen, 'gen_args': { 'num_layers': hparams['agent_params']['num_layers'], 'hidden_size': hparams['d_model'], 'num_dir': 1, 'stack_depth': hparams['agent_params']['stack_depth'], 'stack_width': hparams['agent_params']['stack_width'], 'has_stack': has_stack, 'has_cell': hparams['agent_params']['unit_type'] == 'lstm', 'device': device } } return init_args
def initialize(hparams, demo_data_gen, unbiased_data_gen, has_critic): # Embeddings provider encoder = Encoder(vocab_size=demo_data_gen.n_characters, d_model=hparams['d_model'], padding_idx=demo_data_gen.char2idx[demo_data_gen.pad_symbol], dropout=hparams['dropout'], return_tuple=True).eval() # Agent entities rnn_layers = [] has_stack = True for i in range(1, hparams['agent_params']['num_layers'] + 1): rnn_layers.append(StackRNN(layer_index=i, input_size=hparams['d_model'], hidden_size=hparams['d_model'], has_stack=has_stack, unit_type=hparams['agent_params']['unit_type'], stack_width=hparams['agent_params']['stack_width'], stack_depth=hparams['agent_params']['stack_depth'], k_mask_func=encoder.k_padding_mask)) if hparams['agent_params']['num_layers'] > 1: rnn_layers.append(StackedRNNDropout(hparams['dropout'])) rnn_layers.append(StackedRNNLayerNorm(hparams['d_model'])) agent_net = nn.Sequential(encoder, *rnn_layers, RNNLinearOut(out_dim=demo_data_gen.n_characters, hidden_size=hparams['d_model'], bidirectional=False, bias=True)) agent_net = agent_net.to(device).eval() init_state_args = {'num_layers': hparams['agent_params']['num_layers'], 'hidden_size': hparams['d_model'], 'stack_depth': hparams['agent_params']['stack_depth'], 'stack_width': hparams['agent_params']['stack_width'], 'unit_type': hparams['agent_params']['unit_type']} if has_critic: critic = nn.Sequential(encoder, CriticRNN(hparams['d_model'], hparams['critic_params']['d_model'], unit_type=hparams['critic_params']['unit_type'], dropout=hparams['critic_params']['dropout'], num_layers=hparams['critic_params']['num_layers'])) critic = critic.to(device).eval() else: critic = None # Reward function entities reward_net_rnn = RewardNetRNN(input_size=hparams['d_model'], hidden_size=hparams['reward_params']['d_model'], num_layers=hparams['reward_params']['num_layers'], bidirectional=hparams['reward_params']['bidirectional'], use_attention=hparams['reward_params']['use_attention'], dropout=hparams['reward_params']['dropout'], unit_type=hparams['reward_params']['unit_type'], use_smiles_validity_flag=hparams['reward_params']['use_validity_flag']) reward_net = nn.Sequential(encoder, reward_net_rnn) reward_net = reward_net.to(device) # expert_model = RNNPredictor(hparams['expert_model_params'], device) demo_data_gen.set_batch_size(hparams['reward_params']['demo_batch_size']) init_args = {'agent_net': agent_net, 'critic_net': critic, 'reward_net': reward_net, 'reward_net_rnn': reward_net_rnn, 'encoder': encoder.eval(), 'gamma': hparams['gamma'], # 'expert_model': expert_model, 'demo_data_gen': demo_data_gen, 'unbiased_data_gen': unbiased_data_gen, 'init_hidden_states_args': init_state_args, 'gen_args': {'num_layers': hparams['agent_params']['num_layers'], 'hidden_size': hparams['d_model'], 'num_dir': 1, 'stack_depth': hparams['agent_params']['stack_depth'], 'stack_width': hparams['agent_params']['stack_width'], 'has_stack': has_stack, 'has_cell': hparams['agent_params']['unit_type'] == 'lstm', 'device': device}} return init_args
def initialize(hparams, data_gens, *args, **kwargs): for k in data_gens: data_gens[k].set_batch_size(hparams['batch_size']) gen_data = data_gens['prior_data'] # Create main model encoder = Encoder(vocab_size=gen_data.n_characters, d_model=hparams['d_model'], padding_idx=gen_data.char2idx[gen_data.pad_symbol], dropout=hparams['dropout'], return_tuple=True) # Create RNN layers rnn_layers = [] has_stack = True for i in range(1, hparams['num_layers'] + 1): rnn_layers.append( StackRNN(layer_index=i, input_size=hparams['d_model'], hidden_size=hparams['d_model'], has_stack=has_stack, unit_type=hparams['unit_type'], stack_width=hparams['stack_width'], stack_depth=hparams['stack_depth'], k_mask_func=encoder.k_padding_mask)) if hparams['num_layers'] > 1: rnn_layers.append(StackedRNNDropout(hparams['dropout'])) rnn_layers.append(StackedRNNLayerNorm(hparams['d_model'])) model = nn.Sequential( encoder, *rnn_layers, RNNLinearOut( out_dim=gen_data.n_characters, hidden_size=hparams['d_model'], bidirectional=False, # encoder=encoder, # dropout=hparams['dropout'], bias=True)) if use_cuda: model = model.cuda() optimizer = parse_optimizer(hparams, model) rnn_args = { 'num_layers': hparams['num_layers'], 'hidden_size': hparams['d_model'], 'num_dir': 1, 'device': device, 'has_stack': has_stack, 'has_cell': hparams['unit_type'] == 'lstm', 'stack_width': hparams['stack_width'], 'stack_depth': hparams['stack_depth'], 'demo_data_gen': data_gens['demo_data'], 'unbiased_data_gen': data_gens['unbiased_data'], 'prior_data_gen': data_gens['prior_data'], 'expert_model': { 'pretraining': DummyPredictor(), 'drd2': RNNPredictor(hparams['drd2'], device, True), 'logp': RNNPredictor(hparams['logp'], device), 'jak2_max': XGBPredictor(hparams['jak2']), 'jak2_min': XGBPredictor(hparams['jak2']) }.get(hparams['exp_type']), 'exp_type': hparams['exp_type'], } return model, optimizer, rnn_args