Ejemplo n.º 1
0
    def initialize(hparams, gen_data, *args, **kwargs):
        gen_data.set_batch_size(hparams['batch_size'])
        # Create main model
        encoder = Encoder(vocab_size=gen_data.n_characters,
                          d_model=hparams['d_model'],
                          padding_idx=gen_data.char2idx[gen_data.pad_symbol],
                          dropout=hparams['dropout'],
                          return_tuple=True)
        # Create RNN layers
        rnn_layers = []
        has_stack = True
        for i in range(1, hparams['num_layers'] + 1):
            rnn_layers.append(
                StackRNN(layer_index=i,
                         input_size=hparams['d_model'],
                         hidden_size=hparams['d_model'],
                         has_stack=has_stack,
                         unit_type=hparams['unit_type'],
                         stack_width=hparams['stack_width'],
                         stack_depth=hparams['stack_depth'],
                         k_mask_func=encoder.k_padding_mask))
            if hparams['num_layers'] > 1:
                rnn_layers.append(StackedRNNDropout(hparams['dropout']))
                rnn_layers.append(StackedRNNLayerNorm(hparams['d_model']))

        model = nn.Sequential(
            encoder,
            *rnn_layers,
            RNNLinearOut(
                out_dim=gen_data.n_characters,
                hidden_size=hparams['d_model'],
                bidirectional=False,
                # encoder=encoder,
                # dropout=hparams['dropout'],
                bias=True))
        if use_cuda:
            model = model.cuda()
        optimizer = parse_optimizer(hparams, model)
        rnn_args = {
            'num_layers': hparams['num_layers'],
            'hidden_size': hparams['d_model'],
            'num_dir': 1,
            'device': device,
            'has_stack': has_stack,
            'has_cell': hparams['unit_type'] == 'lstm',
            'stack_width': hparams['stack_width'],
            'stack_depth': hparams['stack_depth']
        }
        return model, optimizer, gen_data, rnn_args
    def initialize(hparams, demo_data_gen, unbiased_data_gen, prior_data_gen,
                   *args, **kwargs):
        # Embeddings provider
        encoder = Encoder(
            vocab_size=demo_data_gen.n_characters,
            d_model=hparams['d_model'],
            padding_idx=demo_data_gen.char2idx[demo_data_gen.pad_symbol],
            dropout=hparams['dropout'],
            return_tuple=True)

        # Agent entities
        rnn_layers = []
        has_stack = True
        for i in range(1, hparams['agent_params']['num_layers'] + 1):
            rnn_layers.append(
                StackRNN(layer_index=i,
                         input_size=hparams['d_model'],
                         hidden_size=hparams['d_model'],
                         has_stack=has_stack,
                         unit_type=hparams['agent_params']['unit_type'],
                         stack_width=hparams['agent_params']['stack_width'],
                         stack_depth=hparams['agent_params']['stack_depth'],
                         k_mask_func=encoder.k_padding_mask))
            if hparams['agent_params']['num_layers'] > 1:
                rnn_layers.append(StackedRNNDropout(hparams['dropout']))
                rnn_layers.append(StackedRNNLayerNorm(hparams['d_model']))
        agent_net = nn.Sequential(
            encoder, *rnn_layers,
            RNNLinearOut(out_dim=demo_data_gen.n_characters,
                         hidden_size=hparams['d_model'],
                         bidirectional=False,
                         bias=True))
        agent_net = agent_net.to(device)
        optimizer_agent_net = parse_optimizer(hparams['agent_params'],
                                              agent_net)
        selector = MolEnvProbabilityActionSelector(
            actions=demo_data_gen.all_characters)
        probs_reg = StateActionProbRegistry()
        init_state_args = {
            'num_layers': hparams['agent_params']['num_layers'],
            'hidden_size': hparams['d_model'],
            'stack_depth': hparams['agent_params']['stack_depth'],
            'stack_width': hparams['agent_params']['stack_width'],
            'unit_type': hparams['agent_params']['unit_type']
        }
        agent = PolicyAgent(model=agent_net,
                            action_selector=selector,
                            states_preprocessor=seq2tensor,
                            initial_state=agent_net_hidden_states_func,
                            initial_state_args=init_state_args,
                            apply_softmax=True,
                            probs_registry=probs_reg,
                            device=device)
        drl_alg = REINFORCE(model=agent_net,
                            optimizer=optimizer_agent_net,
                            initial_states_func=agent_net_hidden_states_func,
                            initial_states_args=init_state_args,
                            prior_data_gen=prior_data_gen,
                            device=device,
                            xent_lambda=hparams['xent_lambda'],
                            gamma=hparams['gamma'],
                            grad_clipping=hparams['reinforce_max_norm'],
                            lr_decay_gamma=hparams['lr_decay_gamma'],
                            lr_decay_step=hparams['lr_decay_step_size'],
                            delayed_reward=not hparams['use_monte_carlo_sim'])

        # Reward function entities
        reward_net = nn.Sequential(
            encoder,
            RewardNetRNN(
                input_size=hparams['d_model'],
                hidden_size=hparams['reward_params']['d_model'],
                num_layers=hparams['reward_params']['num_layers'],
                bidirectional=hparams['reward_params']['bidirectional'],
                use_attention=hparams['reward_params']['use_attention'],
                dropout=hparams['dropout'],
                unit_type=hparams['reward_params']['unit_type'],
                use_smiles_validity_flag=hparams['reward_params']
                ['use_validity_flag']))
        reward_net = reward_net.to(device)

        expert_model = XGBPredictor(hparams['expert_model_dir'])
        true_reward_func = get_jak2_max_reward if hparams[
            'bias_mode'] == 'max' else get_jak2_min_reward
        reward_function = RewardFunction(
            reward_net,
            mc_policy=agent,
            actions=demo_data_gen.all_characters,
            device=device,
            use_mc=hparams['use_monte_carlo_sim'],
            mc_max_sims=hparams['monte_carlo_N'],
            expert_func=expert_model,
            no_mc_fill_val=hparams['no_mc_fill_val'],
            true_reward_func=true_reward_func,
            use_true_reward=hparams['use_true_reward'])
        optimizer_reward_net = parse_optimizer(hparams['reward_params'],
                                               reward_net)
        demo_data_gen.set_batch_size(
            hparams['reward_params']['demo_batch_size'])
        irl_alg = GuidedRewardLearningIRL(
            reward_net,
            optimizer_reward_net,
            demo_data_gen,
            k=hparams['reward_params']['irl_alg_num_iter'],
            agent_net=agent_net,
            agent_net_init_func=agent_net_hidden_states_func,
            agent_net_init_func_args=init_state_args,
            device=device)

        init_args = {
            'agent': agent,
            'probs_reg': probs_reg,
            'drl_alg': drl_alg,
            'irl_alg': irl_alg,
            'reward_func': reward_function,
            'gamma': hparams['gamma'],
            'episodes_to_train': hparams['episodes_to_train'],
            'expert_model': expert_model,
            'demo_data_gen': demo_data_gen,
            'unbiased_data_gen': unbiased_data_gen,
            'gen_args': {
                'num_layers': hparams['agent_params']['num_layers'],
                'hidden_size': hparams['d_model'],
                'num_dir': 1,
                'stack_depth': hparams['agent_params']['stack_depth'],
                'stack_width': hparams['agent_params']['stack_width'],
                'has_stack': has_stack,
                'has_cell': hparams['agent_params']['unit_type'] == 'lstm',
                'device': device
            }
        }
        return init_args
Ejemplo n.º 3
0
def initialize(hparams, demo_data_gen, unbiased_data_gen, has_critic):
    # Embeddings provider
    encoder = Encoder(vocab_size=demo_data_gen.n_characters, d_model=hparams['d_model'],
                      padding_idx=demo_data_gen.char2idx[demo_data_gen.pad_symbol],
                      dropout=hparams['dropout'], return_tuple=True).eval()

    # Agent entities
    rnn_layers = []
    has_stack = True
    for i in range(1, hparams['agent_params']['num_layers'] + 1):
        rnn_layers.append(StackRNN(layer_index=i,
                                   input_size=hparams['d_model'],
                                   hidden_size=hparams['d_model'],
                                   has_stack=has_stack,
                                   unit_type=hparams['agent_params']['unit_type'],
                                   stack_width=hparams['agent_params']['stack_width'],
                                   stack_depth=hparams['agent_params']['stack_depth'],
                                   k_mask_func=encoder.k_padding_mask))
        if hparams['agent_params']['num_layers'] > 1:
            rnn_layers.append(StackedRNNDropout(hparams['dropout']))
            rnn_layers.append(StackedRNNLayerNorm(hparams['d_model']))
    agent_net = nn.Sequential(encoder,
                              *rnn_layers,
                              RNNLinearOut(out_dim=demo_data_gen.n_characters,
                                           hidden_size=hparams['d_model'],
                                           bidirectional=False,
                                           bias=True))
    agent_net = agent_net.to(device).eval()
    init_state_args = {'num_layers': hparams['agent_params']['num_layers'],
                       'hidden_size': hparams['d_model'],
                       'stack_depth': hparams['agent_params']['stack_depth'],
                       'stack_width': hparams['agent_params']['stack_width'],
                       'unit_type': hparams['agent_params']['unit_type']}
    if has_critic:
        critic = nn.Sequential(encoder,
                               CriticRNN(hparams['d_model'], hparams['critic_params']['d_model'],
                                         unit_type=hparams['critic_params']['unit_type'],
                                         dropout=hparams['critic_params']['dropout'],
                                         num_layers=hparams['critic_params']['num_layers']))
        critic = critic.to(device).eval()
    else:
        critic = None

    # Reward function entities
    reward_net_rnn = RewardNetRNN(input_size=hparams['d_model'], hidden_size=hparams['reward_params']['d_model'],
                                  num_layers=hparams['reward_params']['num_layers'],
                                  bidirectional=hparams['reward_params']['bidirectional'],
                                  use_attention=hparams['reward_params']['use_attention'],
                                  dropout=hparams['reward_params']['dropout'],
                                  unit_type=hparams['reward_params']['unit_type'],
                                  use_smiles_validity_flag=hparams['reward_params']['use_validity_flag'])
    reward_net = nn.Sequential(encoder,
                               reward_net_rnn)
    reward_net = reward_net.to(device)
    # expert_model = RNNPredictor(hparams['expert_model_params'], device)
    demo_data_gen.set_batch_size(hparams['reward_params']['demo_batch_size'])

    init_args = {'agent_net': agent_net,
                 'critic_net': critic,
                 'reward_net': reward_net,
                 'reward_net_rnn': reward_net_rnn,
                 'encoder': encoder.eval(),
                 'gamma': hparams['gamma'],
                 # 'expert_model': expert_model,
                 'demo_data_gen': demo_data_gen,
                 'unbiased_data_gen': unbiased_data_gen,
                 'init_hidden_states_args': init_state_args,
                 'gen_args': {'num_layers': hparams['agent_params']['num_layers'],
                              'hidden_size': hparams['d_model'],
                              'num_dir': 1,
                              'stack_depth': hparams['agent_params']['stack_depth'],
                              'stack_width': hparams['agent_params']['stack_width'],
                              'has_stack': has_stack,
                              'has_cell': hparams['agent_params']['unit_type'] == 'lstm',
                              'device': device}}
    return init_args
    def initialize(hparams, data_gens, *args, **kwargs):
        for k in data_gens:
            data_gens[k].set_batch_size(hparams['batch_size'])
        gen_data = data_gens['prior_data']
        # Create main model
        encoder = Encoder(vocab_size=gen_data.n_characters,
                          d_model=hparams['d_model'],
                          padding_idx=gen_data.char2idx[gen_data.pad_symbol],
                          dropout=hparams['dropout'],
                          return_tuple=True)
        # Create RNN layers
        rnn_layers = []
        has_stack = True
        for i in range(1, hparams['num_layers'] + 1):
            rnn_layers.append(
                StackRNN(layer_index=i,
                         input_size=hparams['d_model'],
                         hidden_size=hparams['d_model'],
                         has_stack=has_stack,
                         unit_type=hparams['unit_type'],
                         stack_width=hparams['stack_width'],
                         stack_depth=hparams['stack_depth'],
                         k_mask_func=encoder.k_padding_mask))
            if hparams['num_layers'] > 1:
                rnn_layers.append(StackedRNNDropout(hparams['dropout']))
                rnn_layers.append(StackedRNNLayerNorm(hparams['d_model']))

        model = nn.Sequential(
            encoder,
            *rnn_layers,
            RNNLinearOut(
                out_dim=gen_data.n_characters,
                hidden_size=hparams['d_model'],
                bidirectional=False,
                # encoder=encoder,
                # dropout=hparams['dropout'],
                bias=True))
        if use_cuda:
            model = model.cuda()
        optimizer = parse_optimizer(hparams, model)
        rnn_args = {
            'num_layers': hparams['num_layers'],
            'hidden_size': hparams['d_model'],
            'num_dir': 1,
            'device': device,
            'has_stack': has_stack,
            'has_cell': hparams['unit_type'] == 'lstm',
            'stack_width': hparams['stack_width'],
            'stack_depth': hparams['stack_depth'],
            'demo_data_gen': data_gens['demo_data'],
            'unbiased_data_gen': data_gens['unbiased_data'],
            'prior_data_gen': data_gens['prior_data'],
            'expert_model': {
                'pretraining': DummyPredictor(),
                'drd2': RNNPredictor(hparams['drd2'], device, True),
                'logp': RNNPredictor(hparams['logp'], device),
                'jak2_max': XGBPredictor(hparams['jak2']),
                'jak2_min': XGBPredictor(hparams['jak2'])
            }.get(hparams['exp_type']),
            'exp_type': hparams['exp_type'],
        }
        return model, optimizer, rnn_args